예제 #1
0
    def __init__(self, message=None, **kwargs):
        self.kwargs = kwargs

        if 'code' not in self.kwargs:
            try:
                self.kwargs['code'] = self.code
            except AttributeError:
                pass

        if not message:
            try:
                message = self.msg_fmt % kwargs
            except Exception:
                # kwargs doesn't match a variable in msg_fmt
                # log the issue and the kwargs
                LOG.exception(_LE('Exception in string format operation'))
                for name, value in kwargs.items():
                    LOG.error(_LE("%(name)s: %(value)s"),
                              {'name': name, 'value': value})

                if CONF.fatal_exception_format_errors:
                    raise
                else:
                    # at least get the core msg_fmt out if something happened
                    message = self.msg_fmt

        super(WatcherException, self).__init__(message)
예제 #2
0
    def __init__(self, message=None, **kwargs):
        self.kwargs = kwargs

        if 'code' not in self.kwargs:
            try:
                self.kwargs['code'] = self.code
            except AttributeError:
                pass

        if not message:
            try:
                message = self.msg_fmt % kwargs
            except Exception as e:
                # kwargs doesn't match a variable in msg_fmt
                # log the issue and the kwargs
                LOG.exception(_LE('Exception in string format operation'))
                for name, value in kwargs.items():
                    LOG.error(_LE("%(name)s: %(value)s"),
                              {'name': name, 'value': value})

                if CONF.fatal_exception_format_errors:
                    raise e
                else:
                    # at least get the core msg_fmt out if something happened
                    message = self.msg_fmt

        super(WatcherException, self).__init__(message)
예제 #3
0
    def execute(self, original_model):
        LOG.debug("Initializing Outlet temperature strategy")

        if original_model is None:
            raise wexc.ClusterStateNotDefined()

        current_model = original_model
        hosts_need_release, hosts_target = self.group_hosts_by_outlet_temp(
            current_model)

        if len(hosts_need_release) == 0:
            # TODO(zhenzanz): return something right if there's no hot servers
            LOG.debug("No hosts require optimization")
            return self.solution

        if len(hosts_target) == 0:
            LOG.warning(_LE("No hosts under outlet temp threshold found"))
            return self.solution

        # choose the server with highest outlet t
        hosts_need_release = sorted(hosts_need_release,
                                    reverse=True,
                                    key=lambda x: (x["outlet_temp"]))

        vm_to_migrate = self.choose_vm_to_migrate(current_model,
                                                  hosts_need_release)
        # calculate the vm's cpu cores,memory,disk needs
        if vm_to_migrate is None:
            return self.solution

        mig_src_hypervisor, vm_src = vm_to_migrate
        dest_servers = self.filter_dest_servers(current_model, hosts_target,
                                                vm_src)
        # sort the filtered result by outlet temp
        # pick up the lowest one as dest server
        if len(dest_servers) == 0:
            # TODO(zhenzanz): maybe to warn that there's no resource
            # for instance.
            LOG.info(_LE("No proper target host could be found"))
            return self.solution

        dest_servers = sorted(dest_servers, key=lambda x: (x["outlet_temp"]))
        # always use the host with lowerest outlet temperature
        mig_dst_hypervisor = dest_servers[0]['hv']
        # generate solution to migrate the vm to the dest server,
        if current_model.get_mapping().migrate_vm(vm_src, mig_src_hypervisor,
                                                  mig_dst_hypervisor):
            parameters = {
                'migration_type': 'live',
                'src_hypervisor': mig_src_hypervisor.uuid,
                'dst_hypervisor': mig_dst_hypervisor.uuid
            }
            self.solution.add_action(action_type=self.MIGRATION,
                                     resource_id=vm_src.uuid,
                                     input_parameters=parameters)

        self.solution.model = current_model

        return self.solution
예제 #4
0
    def execute(self, original_model):
        LOG.debug("Initializing Outlet temperature strategy")

        if original_model is None:
            raise wexc.ClusterStateNotDefined()

        current_model = original_model
        hosts_need_release, hosts_target = self.group_hosts_by_outlet_temp(
            current_model)

        if len(hosts_need_release) == 0:
            # TODO(zhenzanz): return something right if there's no hot servers
            LOG.debug("No hosts require optimization")
            return self.solution

        if len(hosts_target) == 0:
            LOG.warning(_LE("No hosts under outlet temp threshold found"))
            return self.solution

        # choose the server with highest outlet t
        hosts_need_release = sorted(hosts_need_release,
                                    reverse=True,
                                    key=lambda x: (x["outlet_temp"]))

        vm_to_migrate = self.choose_vm_to_migrate(current_model,
                                                  hosts_need_release)
        # calculate the vm's cpu cores,memory,disk needs
        if vm_to_migrate is None:
            return self.solution

        mig_src_hypervisor, vm_src = vm_to_migrate
        dest_servers = self.filter_dest_servers(current_model,
                                                hosts_target,
                                                vm_src)
        # sort the filtered result by outlet temp
        # pick up the lowest one as dest server
        if len(dest_servers) == 0:
            # TODO(zhenzanz): maybe to warn that there's no resource
            # for instance.
            LOG.info(_LE("No proper target host could be found"))
            return self.solution

        dest_servers = sorted(dest_servers, key=lambda x: (x["outlet_temp"]))
        # always use the host with lowerest outlet temperature
        mig_dst_hypervisor = dest_servers[0]['hv']
        # generate solution to migrate the vm to the dest server,
        if current_model.get_mapping().migrate_vm(vm_src,
                                                  mig_src_hypervisor,
                                                  mig_dst_hypervisor):
            parameters = {'migration_type': 'live',
                          'src_hypervisor': mig_src_hypervisor.uuid,
                          'dst_hypervisor': mig_dst_hypervisor.uuid}
            self.solution.add_action(action_type=self.MIGRATION,
                                     resource_id=vm_src.uuid,
                                     input_parameters=parameters)

        self.solution.model = current_model

        return self.solution
예제 #5
0
    def choose_instance_to_migrate(self, hosts):
        """Pick up an active instance instance to migrate from provided hosts

        :param hosts: the array of dict which contains node object
        """
        instances_tobe_migrate = []
        for nodemap in hosts:
            source_node = nodemap['node']
            source_instances = self.compute_model.mapping.get_node_instances(
                source_node)
            if source_instances:
                inlet_t = self.ceilometer.statistic_aggregation(
                    resource_id=source_node.uuid,
                    meter_name=self.meter_name_inlet_t,
                    period=self._period,
                    aggregate='avg')
                power = self.ceilometer.statistic_aggregation(
                    resource_id=source_node.uuid,
                    meter_name=self.meter_name_power,
                    period=self._period,
                    aggregate='avg')
                if (power < self.threshold_power and
                        inlet_t < self.threshold_inlet_t):
                    # hardware issue, migrate all instances from this node
                    for instance_id in source_instances:
                        try:
                            instance = (self.compute_model.
                                        get_instance_from_id(instance_id))
                            instances_tobe_migrate.append(instance)
                        except wexc.InstanceNotFound:
                            LOG.error(_LE("Instance not found; error: %s"),
                                      instance_id)
                    return source_node, instances_tobe_migrate
                else:
                    # migrate the first active instance
                    for instance_id in source_instances:
                        try:
                            instance = (self.compute_model.
                                        get_instance_from_id(instance_id))
                            if (instance.state !=
                                    element.InstanceState.ACTIVE.value):
                                LOG.info(
                                    _LI("Instance not active, skipped: %s"),
                                    instance.uuid)
                                continue
                            instances_tobe_migrate.append(instance)
                            return source_node, instances_tobe_migrate
                        except wexc.InstanceNotFound:
                            LOG.error(_LE("Instance not found; error: %s"),
                                      instance_id)
            else:
                LOG.info(_LI("Instance not found on node: %s"),
                         source_node.uuid)
예제 #6
0
    def choose_instance_to_migrate(self, hosts):
        """Pick up an active instance instance to migrate from provided hosts

        :param hosts: the array of dict which contains node object
        """
        instances_tobe_migrate = []
        for nodemap in hosts:
            source_node = nodemap['node']
            source_instances = self.compute_model.mapping.get_node_instances(
                source_node)
            if source_instances:
                inlet_t = self.ceilometer.statistic_aggregation(
                    resource_id=source_node.uuid,
                    meter_name=self.meter_name_inlet_t,
                    period=self._period,
                    aggregate='avg')
                power = self.ceilometer.statistic_aggregation(
                    resource_id=source_node.uuid,
                    meter_name=self.meter_name_power,
                    period=self._period,
                    aggregate='avg')
                if (power < self.threshold_power and
                        inlet_t < self.threshold_inlet_t):
                    # hardware issue, migrate all instances from this node
                    for instance_id in source_instances:
                        try:
                            instance = (self.compute_model.
                                        get_instance_by_uuid(instance_id))
                            instances_tobe_migrate.append(instance)
                        except wexc.InstanceNotFound:
                            LOG.error(_LE("Instance not found; error: %s"),
                                      instance_id)
                    return source_node, instances_tobe_migrate
                else:
                    # migrate the first active instance
                    for instance_id in source_instances:
                        try:
                            instance = (self.compute_model.
                                        get_instance_by_uuid(instance_id))
                            if (instance.state !=
                                    element.InstanceState.ACTIVE.value):
                                LOG.info(
                                    _LI("Instance not active, skipped: %s"),
                                    instance.uuid)
                                continue
                            instances_tobe_migrate.append(instance)
                            return source_node, instances_tobe_migrate
                        except wexc.InstanceNotFound:
                            LOG.error(_LE("Instance not found; error: %s"),
                                      instance_id)
            else:
                LOG.info(_LI("Instance not found on node: %s"),
                         source_node.uuid)
예제 #7
0
    def calculate_score_instance(self, instance):
        """Calculate Score of virtual machine

        :param instance: the virtual machine
        :return: score
        """
        instance_cpu_utilization = self.ceilometer. \
            statistic_aggregation(
                resource_id=instance.uuid,
                meter_name=self.INSTANCE_CPU_USAGE_METRIC_NAME,
                period="7200",
                aggregate='avg'
            )
        if instance_cpu_utilization is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s") %
                dict(resource_id=instance.uuid,
                     metric_name=self.INSTANCE_CPU_USAGE_METRIC_NAME))
            instance_cpu_utilization = 100

        cpu_capacity = self.compute_model.get_resource_by_uuid(
            element.ResourceType.cpu_cores).get_capacity(instance)

        total_cores_used = cpu_capacity * (instance_cpu_utilization / 100.0)

        return self.calculate_weight(instance, total_cores_used, 0, 0)
예제 #8
0
    def calculate_score_node(self, node):
        """Calculate the score that represent the utilization level

        :param node: :py:class:`~.ComputeNode` instance
        :return: Score for the given compute node
        :rtype: float
        """
        resource_id = "%s_%s" % (node.uuid, node.hostname)
        host_avg_cpu_util = self.ceilometer.statistic_aggregation(
            resource_id=resource_id,
            meter_name=self.HOST_CPU_USAGE_METRIC_NAME,
            period="7200",
            aggregate='avg')

        if host_avg_cpu_util is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s") %
                dict(resource_id=resource_id,
                     metric_name=self.HOST_CPU_USAGE_METRIC_NAME))
            host_avg_cpu_util = 100

        cpu_capacity = self.compute_model.get_resource_by_uuid(
            element.ResourceType.cpu_cores).get_capacity(node)

        total_cores_used = cpu_capacity * (host_avg_cpu_util / 100.0)

        return self.calculate_weight(node, total_cores_used, 0, 0)
예제 #9
0
파일: sd.py 프로젝트: XroLLla/watcher
    def get_vm_load(self, vm_uuid):
        """
        Gather vm load through ceilometer statistic.

        :param vm_uuid: vm for which statistic is gathered.
        :return: dict
        """
        LOG.warning('get_vm_load started')
        flavor_id = self.nova.servers.get(vm_uuid).flavor['id']
        vm_vcpus = self.nova.flavors.get(flavor_id).vcpus
        vm_load = {'uuid': vm_uuid, 'vcpus': vm_vcpus}
        for meter in self.metrics:
            avg_meter = self.ceilometer.statistic_aggregation(
                            resource_id=vm_uuid,
                            meter_name=meter,
                            period="120",
                            aggregate='avg'
                            )
            if avg_meter is None:
                LOG.error(
                    _LE("No values returned by %(resource_id)s "
                        "for %(metric_name)s"),
                    resource_id=vm_uuid,
                    metric_name=avg_meter,
                )
            vm_load[meter] = avg_meter if avg_meter else 0
        return vm_load
예제 #10
0
    def calculate_score_node(self, hypervisor, model):
        """calculate the score that represent the utilization level

            :param hypervisor:
            :param model:
            :return:
            """
        resource_id = "%s_%s" % (hypervisor.uuid, hypervisor.hostname)
        vm_avg_cpu_util = self.ceilometer. \
            statistic_aggregation(resource_id=resource_id,
                                  meter_name=self.HOST_CPU_USAGE_METRIC_NAME,
                                  period="7200",
                                  aggregate='avg'
                                  )
        if vm_avg_cpu_util is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s"),
                resource_id=resource_id,
                metric_name=self.HOST_CPU_USAGE_METRIC_NAME,
            )
            vm_avg_cpu_util = 100

        cpu_capacity = model.get_resource_from_id(
            resource.ResourceType.cpu_cores).get_capacity(hypervisor)

        total_cores_used = cpu_capacity * (vm_avg_cpu_util / 100)

        return self.calculate_weight(model, hypervisor, total_cores_used, 0, 0)
예제 #11
0
    def calculate_score_vm(self, vm, cluster_data_model):
        """Calculate Score of virtual machine

        :param vm: the virtual machine
        :param cluster_data_model: the cluster model
        :return: score
        """
        if cluster_data_model is None:
            raise exception.ClusterStateNotDefined()

        vm_cpu_utilization = self.ceilometer. \
            statistic_aggregation(
                resource_id=vm.uuid,
                meter_name=self.INSTANCE_CPU_USAGE_METRIC_NAME,
                period="7200",
                aggregate='avg'
            )
        if vm_cpu_utilization is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s"),
                resource_id=vm.uuid,
                metric_name=self.INSTANCE_CPU_USAGE_METRIC_NAME,
            )
            vm_cpu_utilization = 100

        cpu_capacity = cluster_data_model.get_resource_from_id(
            resource.ResourceType.cpu_cores).get_capacity(vm)

        total_cores_used = cpu_capacity * (vm_cpu_utilization / 100.0)

        return self.calculate_weight(cluster_data_model, vm, total_cores_used,
                                     0, 0)
예제 #12
0
    def group_hosts_by_outlet_temp(self, cluster_data_model):
        """Group hosts based on outlet temp meters"""

        hypervisors = cluster_data_model.get_all_hypervisors()
        size_cluster = len(hypervisors)
        if size_cluster == 0:
            raise wexc.ClusterEmpty()

        hosts_need_release = []
        hosts_target = []
        for hypervisor_id in hypervisors:
            hypervisor = cluster_data_model.get_hypervisor_from_id(
                hypervisor_id)
            resource_id = hypervisor.uuid

            outlet_temp = self.ceilometer.statistic_aggregation(
                resource_id=resource_id,
                meter_name=self._meter,
                period="30",
                aggregate='avg')
            # some hosts may not have outlet temp meters, remove from target
            if outlet_temp is None:
                LOG.warning(_LE("%s: no outlet temp data"), resource_id)
                continue

            LOG.debug("%s: outlet temperature %f" % (resource_id, outlet_temp))
            hvmap = {'hv': hypervisor, 'outlet_temp': outlet_temp}
            if outlet_temp >= self.threshold:
                # mark the hypervisor to release resources
                hosts_need_release.append(hvmap)
            else:
                hosts_target.append(hvmap)
        return hosts_need_release, hosts_target
예제 #13
0
    def calculate_score_node(self, node):
        """Calculate the score that represent the utilization level

        :param node: :py:class:`~.ComputeNode` instance
        :return: Score for the given compute node
        :rtype: float
        """
        resource_id = "%s_%s" % (node.uuid, node.hostname)
        host_avg_cpu_util = self.ceilometer. \
            statistic_aggregation(resource_id=resource_id,
                                  meter_name=self.HOST_CPU_USAGE_METRIC_NAME,
                                  period="7200",
                                  aggregate='avg')

        if host_avg_cpu_util is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s"),
                resource_id=resource_id,
                metric_name=self.HOST_CPU_USAGE_METRIC_NAME,
            )
            host_avg_cpu_util = 100

        cpu_capacity = self.compute_model.get_resource_from_id(
            element.ResourceType.cpu_cores).get_capacity(node)

        total_cores_used = cpu_capacity * (host_avg_cpu_util / 100)

        return self.calculate_weight(node, total_cores_used, 0, 0)
예제 #14
0
    def calculate_score_node(self, hypervisor):
        """Calculate the score that represent the utilization level

        :param hypervisor:
        :return:
        """
        resource_id = "%s_%s" % (hypervisor.uuid, hypervisor.hostname)
        host_avg_cpu_util = self.ceilometer. \
            statistic_aggregation(resource_id=resource_id,
                                  meter_name=self.HOST_CPU_USAGE_METRIC_NAME,
                                  period="7200",
                                  aggregate='avg')

        if host_avg_cpu_util is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s"),
                resource_id=resource_id,
                metric_name=self.HOST_CPU_USAGE_METRIC_NAME,
            )
            host_avg_cpu_util = 100

        cpu_capacity = self.model.get_resource_from_id(
            resource.ResourceType.cpu_cores).get_capacity(hypervisor)

        total_cores_used = cpu_capacity * (host_avg_cpu_util / 100)

        return self.calculate_weight(hypervisor, total_cores_used, 0, 0)
예제 #15
0
    def choose_vm_to_migrate(self, hosts, avg_workload, workload_cache):
        """Pick up an active vm instance to migrate from provided hosts

        :param hosts: the array of dict which contains hypervisor object
        :param avg_workload: the average workload value of all hypervisors
        :param workload_cache: the map contains vm to workload mapping
        """
        for hvmap in hosts:
            source_hypervisor = hvmap['hv']
            source_vms = self.model.get_mapping().get_node_vms(
                source_hypervisor)
            if source_vms:
                delta_workload = hvmap['workload'] - avg_workload
                min_delta = 1000000
                instance_id = None
                for vm_id in source_vms:
                    try:
                        # select the first active VM to migrate
                        vm = self.model.get_vm_from_id(vm_id)
                        if vm.state != vm_state.VMState.ACTIVE.value:
                            LOG.debug("VM not active; skipped: %s",
                                      vm.uuid)
                            continue
                        current_delta = delta_workload - workload_cache[vm_id]
                        if 0 <= current_delta < min_delta:
                            min_delta = current_delta
                            instance_id = vm_id
                    except wexc.InstanceNotFound:
                        LOG.error(_LE("VM not found; error: %s"), vm_id)
                if instance_id:
                    return source_hypervisor, self.model.get_vm_from_id(
                        instance_id)
            else:
                LOG.info(_LI("VM not found on hypervisor: %s"),
                         source_hypervisor.uuid)
예제 #16
0
파일: base.py 프로젝트: akinsWin/watcher
    def obj_class_from_name(cls, objname, objver):
        """Returns a class from the registry based on a name and version."""
        if objname not in cls._obj_classes:
            LOG.error(
                _LE('Unable to instantiate unregistered object type '
                    '%(objtype)s'), dict(objtype=objname))
            raise exception.UnsupportedObjectError(objtype=objname)

        latest = None
        compatible_match = None
        for objclass in cls._obj_classes[objname]:
            if objclass.VERSION == objver:
                return objclass

            version_bits = tuple([int(x) for x in objclass.VERSION.split(".")])
            if latest is None:
                latest = version_bits
            elif latest < version_bits:
                latest = version_bits

            if versionutils.is_compatible(objver, objclass.VERSION):
                compatible_match = objclass

        if compatible_match:
            return compatible_match

        latest_ver = '%i.%i' % latest
        raise exception.IncompatibleObjectVersion(objname=objname,
                                                  objver=objver,
                                                  supported=latest_ver)
예제 #17
0
    def group_hosts_by_cpu_util(self, model):
        """Calculate the workloads of each hypervisor

        try to find out the hypervisors which have reached threshold
        and the hypervisors which are under threshold.
        and also calculate the average workload value of all hypervisors.
        and also generate the VM workload map.
        """

        hypervisors = model.get_all_hypervisors()
        cluster_size = len(hypervisors)
        if not hypervisors:
            raise wexc.ClusterEmpty()
        # get cpu cores capacity of hypervisors and vms
        cap_cores = model.get_resource_from_id(resource.ResourceType.cpu_cores)
        overload_hosts = []
        nonoverload_hosts = []
        # total workload of cluster
        # it's the total core numbers being utilized in a cluster.
        cluster_workload = 0.0
        # use workload_cache to store the workload of VMs for reuse purpose
        workload_cache = {}
        for hypervisor_id in hypervisors:
            hypervisor = model.get_hypervisor_from_id(hypervisor_id)
            vms = model.get_mapping().get_node_vms(hypervisor)
            hypervisor_workload = 0.0
            for vm_id in vms:
                vm = model.get_vm_from_id(vm_id)
                try:
                    cpu_util = self.ceilometer.statistic_aggregation(
                        resource_id=vm_id,
                        meter_name=self._meter,
                        period=self._period,
                        aggregate='avg')
                except Exception as e:
                    LOG.error(_LE("Can not get cpu_util: %s"), e.message)
                    continue
                if cpu_util is None:
                    LOG.debug("%s: cpu_util is None", vm_id)
                    continue
                vm_cores = cap_cores.get_capacity(vm)
                workload_cache[vm_id] = cpu_util * vm_cores / 100
                hypervisor_workload += workload_cache[vm_id]
                LOG.debug("%s: cpu_util %f", vm_id, cpu_util)
            hypervisor_cores = cap_cores.get_capacity(hypervisor)
            hy_cpu_util = hypervisor_workload / hypervisor_cores * 100

            cluster_workload += hypervisor_workload

            hvmap = {'hv': hypervisor, "cpu_util": hy_cpu_util, 'workload':
                     hypervisor_workload}
            if hy_cpu_util >= self.threshold:
                # mark the hypervisor to release resources
                overload_hosts.append(hvmap)
            else:
                nonoverload_hosts.append(hvmap)

        avg_workload = cluster_workload / cluster_size

        return overload_hosts, nonoverload_hosts, avg_workload, workload_cache
예제 #18
0
    def calculate_score_vm(self, vm):
        """Calculate Score of virtual machine

        :param vm: the virtual machine
        :param self.model: the cluster model
        :return: score
        """
        vm_cpu_utilization = self.ceilometer. \
            statistic_aggregation(
                resource_id=vm.uuid,
                meter_name=self.INSTANCE_CPU_USAGE_METRIC_NAME,
                period="7200",
                aggregate='avg'
            )
        if vm_cpu_utilization is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s"),
                resource_id=vm.uuid,
                metric_name=self.INSTANCE_CPU_USAGE_METRIC_NAME,
            )
            vm_cpu_utilization = 100

        cpu_capacity = self.model.get_resource_from_id(
            resource.ResourceType.cpu_cores).get_capacity(vm)

        total_cores_used = cpu_capacity * (vm_cpu_utilization / 100.0)

        return self.calculate_weight(vm, total_cores_used, 0, 0)
예제 #19
0
    def group_hosts_by_airflow(self):
        """Group hosts based on airflow meters"""

        hypervisors = self.model.get_all_hypervisors()
        if not hypervisors:
            raise wexc.ClusterEmpty()
        overload_hosts = []
        nonoverload_hosts = []
        for hypervisor_id in hypervisors:
            hypervisor = self.model.get_hypervisor_from_id(hypervisor_id)
            resource_id = hypervisor.uuid
            airflow = self.ceilometer.statistic_aggregation(
                resource_id=resource_id,
                meter_name=self.meter_name_airflow,
                period=self._period,
                aggregate='avg')
            # some hosts may not have airflow meter, remove from target
            if airflow is None:
                LOG.warning(_LE("%s: no airflow data"), resource_id)
                continue

            LOG.debug("%s: airflow %f" % (resource_id, airflow))
            hvmap = {'hv': hypervisor, 'airflow': airflow}
            if airflow >= self.threshold_airflow:
                # mark the hypervisor to release resources
                overload_hosts.append(hvmap)
            else:
                nonoverload_hosts.append(hvmap)
        return overload_hosts, nonoverload_hosts
예제 #20
0
파일: base.py 프로젝트: Jean-Emile/watcher
    def obj_class_from_name(cls, objname, objver):
        """Returns a class from the registry based on a name and version."""
        if objname not in cls._obj_classes:
            LOG.error(_LE('Unable to instantiate unregistered object type '
                          '%(objtype)s'), dict(objtype=objname))
            raise exception.UnsupportedObjectError(objtype=objname)

        latest = None
        compatible_match = None
        for objclass in cls._obj_classes[objname]:
            if objclass.VERSION == objver:
                return objclass

            version_bits = tuple([int(x) for x in objclass.VERSION.split(".")])
            if latest is None:
                latest = version_bits
            elif latest < version_bits:
                latest = version_bits

            if versionutils.is_compatible(objver, objclass.VERSION):
                compatible_match = objclass

        if compatible_match:
            return compatible_match

        latest_ver = '%i.%i' % latest
        raise exception.IncompatibleObjectVersion(objname=objname,
                                                  objver=objver,
                                                  supported=latest_ver)
예제 #21
0
def cleanup():
    global TRANSPORT, NOTIFICATION_TRANSPORT, NOTIFIER
    if NOTIFIER is None:
        LOG.exception(_LE("RPC cleanup: NOTIFIER is None"))
    TRANSPORT.cleanup()
    NOTIFICATION_TRANSPORT.cleanup()
    TRANSPORT = NOTIFICATION_TRANSPORT = NOTIFIER = None
예제 #22
0
파일: sd.py 프로젝트: XroLLla/watcher
    def get_vm_load(self, vm_uuid):
        """
        Gather vm load through ceilometer statistic.

        :param vm_uuid: vm for which statistic is gathered.
        :return: dict
        """
        LOG.warning('get_vm_load started')
        flavor_id = self.nova.servers.get(vm_uuid).flavor['id']
        vm_vcpus = self.nova.flavors.get(flavor_id).vcpus
        vm_load = {'uuid': vm_uuid, 'vcpus': vm_vcpus}
        for meter in self.metrics:
            avg_meter = self.ceilometer.statistic_aggregation(
                resource_id=vm_uuid,
                meter_name=meter,
                period="120",
                aggregate='avg')
            if avg_meter is None:
                LOG.error(
                    _LE("No values returned by %(resource_id)s "
                        "for %(metric_name)s"),
                    resource_id=vm_uuid,
                    metric_name=avg_meter,
                )
            vm_load[meter] = avg_meter if avg_meter else 0
        return vm_load
예제 #23
0
    def calculate_score_instance(self, instance):
        """Calculate Score of virtual machine

        :param instance: the virtual machine
        :return: score
        """
        instance_cpu_utilization = self.ceilometer. \
            statistic_aggregation(
                resource_id=instance.uuid,
                meter_name=self.INSTANCE_CPU_USAGE_METRIC_NAME,
                period="7200",
                aggregate='avg'
            )
        if instance_cpu_utilization is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s"),
                resource_id=instance.uuid,
                metric_name=self.INSTANCE_CPU_USAGE_METRIC_NAME,
            )
            instance_cpu_utilization = 100

        cpu_capacity = self.compute_model.get_resource_from_id(
            element.ResourceType.cpu_cores).get_capacity(instance)

        total_cores_used = cpu_capacity * (instance_cpu_utilization / 100.0)

        return self.calculate_weight(instance, total_cores_used, 0, 0)
예제 #24
0
    def group_hosts_by_outlet_temp(self, cluster_data_model):
        """Group hosts based on outlet temp meters"""

        hypervisors = cluster_data_model.get_all_hypervisors()
        size_cluster = len(hypervisors)
        if size_cluster == 0:
            raise wexc.ClusterEmpty()

        hosts_need_release = []
        hosts_target = []
        for hypervisor_id in hypervisors:
            hypervisor = cluster_data_model.get_hypervisor_from_id(
                hypervisor_id)
            resource_id = hypervisor.uuid

            outlet_temp = self.ceilometer.statistic_aggregation(
                resource_id=resource_id,
                meter_name=self._meter,
                period="30",
                aggregate='avg')
            # some hosts may not have outlet temp meters, remove from target
            if outlet_temp is None:
                LOG.warning(_LE("%s: no outlet temp data"), resource_id)
                continue

            LOG.debug("%s: outlet temperature %f" % (resource_id, outlet_temp))
            hvmap = {'hv': hypervisor, 'outlet_temp': outlet_temp}
            if outlet_temp >= self.threshold:
                # mark the hypervisor to release resources
                hosts_need_release.append(hvmap)
            else:
                hosts_target.append(hvmap)
        return hosts_need_release, hosts_target
예제 #25
0
    def choose_vm_to_migrate(self, hosts):
        """pick up an active vm instance to migrate from provided hosts

        :param hosts: the array of dict which contains hypervisor object
        """
        vms_tobe_migrate = []
        for hvmap in hosts:
            source_hypervisor = hvmap['hv']
            source_vms = self.model.get_mapping().get_node_vms(
                source_hypervisor)
            if source_vms:
                inlet_t = self.ceilometer.statistic_aggregation(
                    resource_id=source_hypervisor.uuid,
                    meter_name=self.meter_name_inlet_t,
                    period=self._period,
                    aggregate='avg')
                power = self.ceilometer.statistic_aggregation(
                    resource_id=source_hypervisor.uuid,
                    meter_name=self.meter_name_power,
                    period=self._period,
                    aggregate='avg')
                if (power < self.threshold_power and
                        inlet_t < self.threshold_inlet_t):
                    # hardware issue, migrate all vms from this hypervisor
                    for vm_id in source_vms:
                        try:
                            vm = self.model.get_vm_from_id(vm_id)
                            vms_tobe_migrate.append(vm)
                        except wexc.InstanceNotFound:
                            LOG.error(_LE("VM not found Error: %s"), vm_id)
                    return source_hypervisor, vms_tobe_migrate
                else:
                    # migrate the first active vm
                    for vm_id in source_vms:
                        try:
                            vm = self.model.get_vm_from_id(vm_id)
                            if vm.state != vm_state.VMState.ACTIVE.value:
                                LOG.info(_LE("VM not active, skipped: %s"),
                                         vm.uuid)
                                continue
                            vms_tobe_migrate.append(vm)
                            return source_hypervisor, vms_tobe_migrate
                        except wexc.InstanceNotFound:
                            LOG.error(_LE("VM not found Error: %s"), vm_id)
            else:
                LOG.info(_LI("VM not found from hypervisor: %s"),
                         source_hypervisor.uuid)
    def get_vm_utilization(self, vm_uuid, model, period=3600, aggr='avg'):
        """Collect cpu, ram and disk utilization statistics of a VM.

        :param vm_uuid: vm object
        :param model: model_root object
        :param period: seconds
        :param aggr: string
        :return: dict(cpu(number of vcpus used), ram(MB used), disk(B used))
        """
        if vm_uuid in self.ceilometer_vm_data_cache.keys():
            return self.ceilometer_vm_data_cache.get(vm_uuid)

        cpu_util_metric = 'cpu_util'
        ram_util_metric = 'memory.usage'

        ram_alloc_metric = 'memory'
        disk_alloc_metric = 'disk.root.size'
        vm_cpu_util = self.ceilometer.statistic_aggregation(
            resource_id=vm_uuid,
            meter_name=cpu_util_metric,
            period=period,
            aggregate=aggr)
        vm_cpu_cores = model.get_resource_from_id(
            resource.ResourceType.cpu_cores).get_capacity(
                model.get_vm_from_id(vm_uuid))

        if vm_cpu_util:
            total_cpu_utilization = vm_cpu_cores * (vm_cpu_util / 100.0)
        else:
            total_cpu_utilization = vm_cpu_cores

        vm_ram_util = self.ceilometer.statistic_aggregation(
            resource_id=vm_uuid,
            meter_name=ram_util_metric,
            period=period,
            aggregate=aggr)

        if not vm_ram_util:
            vm_ram_util = self.ceilometer.statistic_aggregation(
                resource_id=vm_uuid,
                meter_name=ram_alloc_metric,
                period=period,
                aggregate=aggr)

        vm_disk_util = self.ceilometer.statistic_aggregation(
            resource_id=vm_uuid,
            meter_name=disk_alloc_metric,
            period=period,
            aggregate=aggr)

        if not vm_ram_util or not vm_disk_util:
            LOG.error(_LE('No values returned by %(resource_id)s '
                          'for memory.usage or disk.root.size'),
                      resource_id=vm_uuid)
            raise exception.NoDataFound

        self.ceilometer_vm_data_cache[vm_uuid] = dict(
            cpu=total_cpu_utilization, ram=vm_ram_util, disk=vm_disk_util)
        return self.ceilometer_vm_data_cache.get(vm_uuid)
예제 #27
0
파일: base.py 프로젝트: akinsWin/watcher
 def setter(self, value, name=name, typefn=typefn):
     self._changed_fields.add(name)
     try:
         return setattr(self, get_attrname(name), typefn(value))
     except Exception:
         attr = "%s.%s" % (self.obj_name(), name)
         LOG.exception(_LE('Error setting %(attr)s'), {'attr': attr})
         raise
예제 #28
0
파일: service.py 프로젝트: icclab/watcher
    def stop(self):
        try:
            self.rpcserver.stop()
            self.rpcserver.wait()
        except Exception as e:
            LOG.exception(_LE('Service error occurred when stopping the '
                              'RPC server. Error: %s'), e)
        try:
            self.manager.del_host(deregister=self.deregister)
        except Exception as e:
            LOG.exception(_LE('Service error occurred when cleaning up '
                              'the RPC manager. Error: %s'), e)

        super(RPCService, self).stop(graceful=True)
        LOG.info(_LI('Stopped RPC server for service %(service)s on host '
                     '%(host)s.'),
                 {'service': self.topic, 'host': self.host})
예제 #29
0
    def group_hosts_by_cpu_util(self):
        """Calculate the workloads of each node

        try to find out the nodes which have reached threshold
        and the nodes which are under threshold.
        and also calculate the average workload value of all nodes.
        and also generate the instance workload map.
        """

        nodes = self.compute_model.get_all_compute_nodes()
        cluster_size = len(nodes)
        if not nodes:
            raise wexc.ClusterEmpty()
        overload_hosts = []
        nonoverload_hosts = []
        # total workload of cluster
        cluster_workload = 0.0
        # use workload_cache to store the workload of VMs for reuse purpose
        workload_cache = {}
        for node_id in nodes:
            node = self.compute_model.get_node_by_uuid(node_id)
            instances = self.compute_model.get_node_instances(node)
            node_workload = 0.0
            for instance in instances:
                try:
                    cpu_util = self.ceilometer.statistic_aggregation(
                        resource_id=instance.uuid,
                        meter_name=self._meter,
                        period=self._period,
                        aggregate='avg')
                except Exception as exc:
                    LOG.exception(exc)
                    LOG.error(_LE("Can not get cpu_util from Ceilometer"))
                    continue
                if cpu_util is None:
                    LOG.debug("Instance (%s): cpu_util is None", instance.uuid)
                    continue
                workload_cache[instance.uuid] = cpu_util * instance.vcpus / 100
                node_workload += workload_cache[instance.uuid]
                LOG.debug("VM (%s): cpu_util %f", instance.uuid, cpu_util)
            node_cpu_util = node_workload / node.vcpus * 100

            cluster_workload += node_workload

            instance_data = {
                'node': node,
                "cpu_util": node_cpu_util,
                'workload': node_workload
            }
            if node_cpu_util >= self.threshold:
                # mark the node to release resources
                overload_hosts.append(instance_data)
            else:
                nonoverload_hosts.append(instance_data)

        avg_workload = cluster_workload / cluster_size

        return overload_hosts, nonoverload_hosts, avg_workload, workload_cache
    def get_instance_utilization(self, instance, period=3600, aggr='avg'):
        """Collect cpu, ram and disk utilization statistics of a VM.

        :param instance: instance object
        :param period: seconds
        :param aggr: string
        :return: dict(cpu(number of vcpus used), ram(MB used), disk(B used))
        """
        if instance.uuid in self.ceilometer_instance_data_cache.keys():
            return self.ceilometer_instance_data_cache.get(instance.uuid)

        cpu_util_metric = 'cpu_util'
        ram_util_metric = 'memory.usage'

        ram_alloc_metric = 'memory'
        disk_alloc_metric = 'disk.root.size'
        instance_cpu_util = self.ceilometer.statistic_aggregation(
            resource_id=instance.uuid,
            meter_name=cpu_util_metric,
            period=period,
            aggregate=aggr)

        if instance_cpu_util:
            total_cpu_utilization = (instance.vcpus *
                                     (instance_cpu_util / 100.0))
        else:
            total_cpu_utilization = instance.vcpus

        instance_ram_util = self.ceilometer.statistic_aggregation(
            resource_id=instance.uuid,
            meter_name=ram_util_metric,
            period=period,
            aggregate=aggr)

        if not instance_ram_util:
            instance_ram_util = self.ceilometer.statistic_aggregation(
                resource_id=instance.uuid,
                meter_name=ram_alloc_metric,
                period=period,
                aggregate=aggr)

        instance_disk_util = self.ceilometer.statistic_aggregation(
            resource_id=instance.uuid,
            meter_name=disk_alloc_metric,
            period=period,
            aggregate=aggr)

        if not instance_ram_util or not instance_disk_util:
            LOG.error(
                _LE('No values returned by %s for memory.usage '
                    'or disk.root.size'), instance.uuid)
            raise exception.NoDataFound

        self.ceilometer_instance_data_cache[instance.uuid] = dict(
            cpu=total_cpu_utilization,
            ram=instance_ram_util,
            disk=instance_disk_util)
        return self.ceilometer_instance_data_cache.get(instance.uuid)
예제 #31
0
파일: base.py 프로젝트: Jean-Emile/watcher
 def setter(self, value, name=name, typefn=typefn):
     self._changed_fields.add(name)
     try:
         return setattr(self, get_attrname(name), typefn(value))
     except Exception:
         attr = "%s.%s" % (self.obj_name(), name)
         LOG.exception(_LE('Error setting %(attr)s'),
                       {'attr': attr})
         raise
예제 #32
0
    def __call__(self, environ, start_response):
        # Request for this state, modified by replace_start_response()
        # and used when an error is being reported.
        state = {}

        def replacement_start_response(status, headers, exc_info=None):
            """Overrides the default response to make errors parsable."""
            try:
                status_code = int(status.split(' ')[0])
                state['status_code'] = status_code
            except (ValueError, TypeError):  # pragma: nocover
                raise Exception(_(
                    'ErrorDocumentMiddleware received an invalid '
                    'status %s') % status)
            else:
                if (state['status_code'] // 100) not in (2, 3):
                    # Remove some headers so we can replace them later
                    # when we have the full error message and can
                    # compute the length.
                    headers = [(h, v)
                               for (h, v) in headers
                               if h not in ('Content-Length', 'Content-Type')]
                # Save the headers in case we need to modify them.
                state['headers'] = headers
                return start_response(status, headers, exc_info)

        app_iter = self.app(environ, replacement_start_response)
        if (state['status_code'] // 100) not in (2, 3):
            req = webob.Request(environ)
            if (
                    req.accept.best_match(
                        ['application/json',
                         'application/xml']) == 'application/xml'
            ):
                try:
                    # simple check xml is valid
                    body = [
                        et.ElementTree.tostring(
                            et.ElementTree.Element(
                                'error_message', text='\n'.join(app_iter)))]
                except et.ElementTree.ParseError as err:
                    LOG.error(_LE('Error parsing HTTP response: %s'), err)
                    body = ['<error_message>%s'
                            '</error_message>' % state['status_code']]
                state['headers'].append(('Content-Type', 'application/xml'))
            else:
                if six.PY3:
                    app_iter = [i.decode('utf-8') for i in app_iter]
                body = [jsonutils.dumps(
                    {'error_message': '\n'.join(app_iter)})]
                if six.PY3:
                    body = [item.encode('utf-8') for item in body]
                state['headers'].append(('Content-Type', 'application/json'))
            state['headers'].append(('Content-Length', str(len(body[0]))))
        else:
            body = app_iter
        return body
    def get_instance_utilization(self, instance_uuid, model,
                                 period=3600, aggr='avg'):
        """Collect cpu, ram and disk utilization statistics of a VM.

        :param instance_uuid: instance object
        :param model: model_root object
        :param period: seconds
        :param aggr: string
        :return: dict(cpu(number of vcpus used), ram(MB used), disk(B used))
        """
        if instance_uuid in self.ceilometer_instance_data_cache.keys():
            return self.ceilometer_instance_data_cache.get(instance_uuid)

        cpu_util_metric = 'cpu_util'
        ram_util_metric = 'memory.usage'

        ram_alloc_metric = 'memory'
        disk_alloc_metric = 'disk.root.size'
        instance_cpu_util = self.ceilometer.statistic_aggregation(
            resource_id=instance_uuid, meter_name=cpu_util_metric,
            period=period, aggregate=aggr)
        instance_cpu_cores = model.get_resource_from_id(
            element.ResourceType.cpu_cores).get_capacity(
                model.get_instance_from_id(instance_uuid))

        if instance_cpu_util:
            total_cpu_utilization = (
                instance_cpu_cores * (instance_cpu_util / 100.0))
        else:
            total_cpu_utilization = instance_cpu_cores

        instance_ram_util = self.ceilometer.statistic_aggregation(
            resource_id=instance_uuid, meter_name=ram_util_metric,
            period=period, aggregate=aggr)

        if not instance_ram_util:
            instance_ram_util = self.ceilometer.statistic_aggregation(
                resource_id=instance_uuid, meter_name=ram_alloc_metric,
                period=period, aggregate=aggr)

        instance_disk_util = self.ceilometer.statistic_aggregation(
            resource_id=instance_uuid, meter_name=disk_alloc_metric,
            period=period, aggregate=aggr)

        if not instance_ram_util or not instance_disk_util:
            LOG.error(
                _LE('No values returned by %(resource_id)s '
                    'for memory.usage or disk.root.size'),
                resource_id=instance_uuid
            )
            raise exception.NoDataFound

        self.ceilometer_instance_data_cache[instance_uuid] = dict(
            cpu=total_cpu_utilization, ram=instance_ram_util,
            disk=instance_disk_util)
        return self.ceilometer_instance_data_cache.get(instance_uuid)
예제 #34
0
    def __call__(self, environ, start_response):
        # Request for this state, modified by replace_start_response()
        # and used when an error is being reported.
        state = {}

        def replacement_start_response(status, headers, exc_info=None):
            """Overrides the default response to make errors parsable."""
            try:
                status_code = int(status.split(' ')[0])
                state['status_code'] = status_code
            except (ValueError, TypeError):  # pragma: nocover
                raise Exception(_(
                    'ErrorDocumentMiddleware received an invalid '
                    'status %s') % status)
            else:
                if (state['status_code'] // 100) not in (2, 3):
                    # Remove some headers so we can replace them later
                    # when we have the full error message and can
                    # compute the length.
                    headers = [(h, v)
                               for (h, v) in headers
                               if h not in ('Content-Length', 'Content-Type')
                               ]
                # Save the headers in case we need to modify them.
                state['headers'] = headers
                return start_response(status, headers, exc_info)

        app_iter = self.app(environ, replacement_start_response)
        if (state['status_code'] // 100) not in (2, 3):
            req = webob.Request(environ)
            if (req.accept.best_match(['application/json', 'application/xml']
                                      ) == 'application/xml'):
                try:
                    # simple check xml is valid
                    body = [et.ElementTree.tostring(
                            et.ElementTree.Element('error_message',
                                                   text='\n'.join(app_iter)))]
                except et.ElementTree.ParseError as err:
                    LOG.error(_LE('Error parsing HTTP response: %s'), err)
                    body = [et.ElementTree.tostring(
                            et.ElementTree.Element('error_message',
                                                   text=state['status_code']))]
                state['headers'].append(('Content-Type', 'application/xml'))
            else:
                if six.PY3:
                    app_iter = [i.decode('utf-8') for i in app_iter]
                body = [jsonutils.dumps(
                        {'error_message': '\n'.join(app_iter)})]
                if six.PY3:
                    body = [item.encode('utf-8') for item in body]
                state['headers'].append(('Content-Type', 'application/json'))
            state['headers'].append(('Content-Length', str(len(body[0]))))
        else:
            body = app_iter
        return body
예제 #35
0
파일: sync.py 프로젝트: Oliverlyn/watcher
 def _soft_delete_removed_goals(self):
     removed_goals = [
         g for g in self.available_goals
         if g.name not in self.discovered_map['goals']]
     for removed_goal in removed_goals:
         removed_goal.soft_delete()
         filters = {"goal_id": removed_goal.id}
         invalid_ats = objects.AuditTemplate.list(self.ctx, filters=filters)
         for at in invalid_ats:
             LOG.warning(
                 _LE("Audit Template '%(audit_template)s' references a "
                     "goal that does not exist"),
                 audit_template=at.uuid)
예제 #36
0
    def execute(self, *args, **kwargs):
        try:
            LOG.debug("Running action: %s", self.name)

            self.action.execute()
            self.engine.notify(self._db_action, objects.action.State.SUCCEEDED)
        except Exception as e:
            LOG.exception(e)
            LOG.error(_LE('The workflow engine has failed '
                          'to execute the action: %s'), self.name)

            self.engine.notify(self._db_action, objects.action.State.FAILED)
            raise
예제 #37
0
    def execute(self, *args, **kwargs):
        try:
            LOG.debug("Running action: %s", self.name)

            self.action.execute()
            self.engine.notify(self._db_action,
                               obj_action.State.SUCCEEDED)
        except Exception as e:
            LOG.exception(e)
            LOG.error(_LE('The workflow engine has failed '
                          'to execute the action: %s'), self.name)

            self.engine.notify(self._db_action,
                               obj_action.State.FAILED)
            raise
    def get_state_str(self, state):
        """Get resource state in string format.

        :param state: resource state of unknown type
        """
        if isinstance(state, six.string_types):
            return state
        elif isinstance(state, (element.InstanceState, element.ServiceState)):
            return state.value
        else:
            LOG.error(_LE('Unexpexted resource state type, '
                          'state=%(state)s, state_type=%(st)s.'),
                      state=state,
                      st=type(state))
            raise exception.WatcherException
    def get_state_str(self, state):
        """Get resource state in string format.

        :param state: resource state of unknown type
        """
        if isinstance(state, six.string_types):
            return state
        elif isinstance(state, (element.InstanceState, element.ServiceState)):
            return state.value
        else:
            LOG.error(
                _LE('Unexpexted resource state type, '
                    'state=%(state)s, state_type=%(st)s.') %
                dict(state=state, st=type(state)))
            raise exception.WatcherException
    def get_state_str(self, state):
        """Get resource state in string format.

        :param state: resource state of unknown type
        """
        if isinstance(state, six.string_types):
            return state
        elif (type(state) == hyper_state.HypervisorState
              or type(state) == vm_state.VMState):
            return state.value
        else:
            LOG.error(_LE('Unexpexted resource state type, '
                          'state=%(state)s, state_type=%(st)s.'),
                      state=state,
                      st=type(state))
            raise exception.WatcherException
예제 #41
0
 def _configure(self):
     try:
         self.__transport = om.get_transport(CONF)
         self.__notifier = self.build_notifier()
         if len(self.__endpoints):
             target = om.Target(
                 topic=self.topic_name,
                 # For compatibility, we can override it with 'host' opt
                 server=CONF.host or socket.getfqdn(),
                 version=self.__version,
             )
             self.__server = self.build_server(target)
         else:
             LOG.warning(_LW("No endpoint defined; can only publish events"))
     except Exception as e:
         LOG.exception(e)
         LOG.error(_LE("Messaging configuration error"))
예제 #42
0
    def execute(self, *args, **kwargs):
        try:
            LOG.debug("Running action %s", self.name)

            # todo(jed) remove return (true or false) raise an Exception
            result = self.action.execute()
            if result is not True:
                self.engine.notify(self._db_action, obj_action.State.FAILED)
            else:
                self.engine.notify(self._db_action, obj_action.State.SUCCEEDED)
        except Exception as e:
            LOG.exception(e)
            LOG.error(
                _LE('The WorkFlow Engine has failed '
                    'to execute the action %s'), self.name)

            self.engine.notify(self._db_action, obj_action.State.FAILED)
            raise
예제 #43
0
 def _configure(self):
     try:
         self.__transport = om.get_transport(CONF)
         self.__notifier = self.build_notifier()
         if len(self.__endpoints):
             target = om.Target(
                 topic=self.topic_name,
                 # For compatibility, we can override it with 'host' opt
                 server=CONF.host or socket.getfqdn(),
                 version=self.__version,
             )
             self.__server = self.build_server(target)
         else:
             LOG.warning(
                 _LW("No endpoint defined; can only publish events"))
     except Exception as e:
         LOG.exception(e)
         LOG.error(_LE("Messaging configuration error"))
    def calculate_score_instance(self, instance):
        """Calculate Score of virtual machine

        :param instance: the virtual machine
        :return: score
        """
        instance_cpu_utilization = self.get_instance_cpu_usage(instance)
        if instance_cpu_utilization is None:
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s") %
                dict(resource_id=instance.uuid,
                     metric_name=self.METRIC_NAMES[self.config.datasource]
                     ['instance_cpu_usage']))
            instance_cpu_utilization = 100

        total_cores_used = instance.vcpus * (instance_cpu_utilization / 100.0)

        return self.calculate_weight(instance, total_cores_used, 0, 0)
    def add_migration(self, vm_uuid, src_hypervisor, dst_hypervisor, model):
        """Add an action for VM migration into the solution.

        :param vm_uuid: vm uuid
        :param src_hypervisor: hypervisor object
        :param dst_hypervisor: hypervisor object
        :param model: model_root object
        :return: None
        """
        vm = model.get_vm_from_id(vm_uuid)

        vm_state_str = self.get_state_str(vm.state)
        if vm_state_str != vm_state.VMState.ACTIVE.value:
            '''
            Watcher curently only supports live VM migration and block live
            VM migration which both requires migrated VM to be active.
            When supported, the cold migration may be used as a fallback
            migration mechanism to move non active VMs.
            '''
            LOG.error(_LE('Cannot live migrate: vm_uuid=%(vm_uuid)s, '
                          'state=%(vm_state)s.'),
                      vm_uuid=vm_uuid,
                      vm_state=vm_state_str)
            raise exception.WatcherException

        migration_type = 'live'

        dst_hyper_state_str = self.get_state_str(dst_hypervisor.state)
        if dst_hyper_state_str == hyper_state.HypervisorState.OFFLINE.value:
            self.add_action_activate_hypervisor(dst_hypervisor)
        model.get_mapping().unmap(src_hypervisor, vm)
        model.get_mapping().map(dst_hypervisor, vm)

        params = {
            'migration_type': migration_type,
            'src_hypervisor': src_hypervisor.uuid,
            'dst_hypervisor': dst_hypervisor.uuid
        }
        self.solution.add_action(action_type='migrate',
                                 resource_id=vm.uuid,
                                 input_parameters=params)
        self.number_of_migrations += 1
예제 #46
0
파일: default.py 프로젝트: XroLLla/watcher
    def execute(self, *args, **kwargs):
        try:
            LOG.debug("Running action %s", self.name)

            # todo(jed) remove return (true or false) raise an Exception
            result = self.action.execute()
            if result is not True:
                self.engine.notify(self._db_action,
                                   obj_action.State.FAILED)
            else:
                self.engine.notify(self._db_action,
                                   obj_action.State.SUCCEEDED)
        except Exception as e:
            LOG.exception(e)
            LOG.error(_LE('The WorkFlow Engine has failed '
                          'to execute the action %s'), self.name)

            self.engine.notify(self._db_action,
                               obj_action.State.FAILED)
            raise
    def add_migration(self, vm_uuid, src_hypervisor,
                      dst_hypervisor, model):
        """Add an action for VM migration into the solution.

        :param vm_uuid: vm uuid
        :param src_hypervisor: hypervisor object
        :param dst_hypervisor: hypervisor object
        :param model: model_root object
        :return: None
        """
        vm = model.get_vm_from_id(vm_uuid)

        vm_state_str = self.get_state_str(vm.state)
        if vm_state_str != vm_state.VMState.ACTIVE.value:
            '''
            Watcher curently only supports live VM migration and block live
            VM migration which both requires migrated VM to be active.
            When supported, the cold migration may be used as a fallback
            migration mechanism to move non active VMs.
            '''
            LOG.error(_LE('Cannot live migrate: vm_uuid=%(vm_uuid)s, '
                          'state=%(vm_state)s.'),
                      vm_uuid=vm_uuid,
                      vm_state=vm_state_str)
            raise exception.WatcherException

        migration_type = 'live'

        dst_hyper_state_str = self.get_state_str(dst_hypervisor.state)
        if dst_hyper_state_str == hyper_state.HypervisorState.OFFLINE.value:
            self.add_action_activate_hypervisor(dst_hypervisor)
        model.get_mapping().unmap(src_hypervisor, vm)
        model.get_mapping().map(dst_hypervisor, vm)

        params = {'migration_type': migration_type,
                  'src_hypervisor': src_hypervisor.uuid,
                  'dst_hypervisor': dst_hypervisor.uuid}
        self.solution.add_action(action_type='migrate',
                                 resource_id=vm.uuid,
                                 input_parameters=params)
        self.number_of_migrations += 1
예제 #48
0
    def choose_vm_to_migrate(self, hosts):
        """Pick up an active vm instance to migrate from provided hosts"""
        for hvmap in hosts:
            mig_src_hypervisor = hvmap['hv']
            vms_of_src = self.model.get_mapping().get_node_vms(
                mig_src_hypervisor)
            if len(vms_of_src) > 0:
                for vm_id in vms_of_src:
                    try:
                        # select the first active VM to migrate
                        vm = self.model.get_vm_from_id(vm_id)
                        if vm.state != vm_state.VMState.ACTIVE.value:
                            LOG.info(_LE("VM not active, skipped: %s"),
                                     vm.uuid)
                            continue
                        return mig_src_hypervisor, vm
                    except wexc.InstanceNotFound as e:
                        LOG.exception(e)
                        LOG.info(_LI("VM not found"))

        return None
예제 #49
0
    def choose_instance_to_migrate(self, hosts, avg_workload, workload_cache):
        """Pick up an active instance instance to migrate from provided hosts

        :param hosts: the array of dict which contains node object
        :param avg_workload: the average workload value of all nodes
        :param workload_cache: the map contains instance to workload mapping
        """
        for instance_data in hosts:
            source_node = instance_data['node']
            source_instances = self.compute_model.mapping.get_node_instances(
                source_node)
            if source_instances:
                delta_workload = instance_data['workload'] - avg_workload
                min_delta = 1000000
                instance_id = None
                for inst_id in source_instances:
                    try:
                        # select the first active VM to migrate
                        instance = self.compute_model.get_instance_from_id(
                            inst_id)
                        if (instance.state !=
                                element.InstanceState.ACTIVE.value):
                            LOG.debug("Instance not active, skipped: %s",
                                      instance.uuid)
                            continue
                        current_delta = (
                            delta_workload - workload_cache[inst_id])
                        if 0 <= current_delta < min_delta:
                            min_delta = current_delta
                            instance_id = inst_id
                    except wexc.InstanceNotFound:
                        LOG.error(_LE("Instance not found; error: %s"),
                                  instance_id)
                if instance_id:
                    return (source_node,
                            self.compute_model.get_instance_from_id(
                                instance_id))
            else:
                LOG.info(_LI("VM not found from node: %s"),
                         source_node.uuid)
    def add_migration(self, instance_uuid, source_node,
                      destination_node, model):
        """Add an action for VM migration into the solution.

        :param instance_uuid: instance uuid
        :param source_node: node object
        :param destination_node: node object
        :param model: model_root object
        :return: None
        """
        instance = model.get_instance_from_id(instance_uuid)

        instance_state_str = self.get_state_str(instance.state)
        if instance_state_str != element.InstanceState.ACTIVE.value:
            # Watcher curently only supports live VM migration and block live
            # VM migration which both requires migrated VM to be active.
            # When supported, the cold migration may be used as a fallback
            # migration mechanism to move non active VMs.
            LOG.error(
                _LE('Cannot live migrate: instance_uuid=%(instance_uuid)s, '
                    'state=%(instance_state)s.'),
                instance_uuid=instance_uuid,
                instance_state=instance_state_str)
            raise exception.WatcherException

        migration_type = 'live'

        destination_node_state_str = self.get_state_str(destination_node.state)
        if destination_node_state_str == element.ServiceState.DISABLED.value:
            self.add_action_enable_compute_node(destination_node)
        model.mapping.unmap(source_node, instance)
        model.mapping.map(destination_node, instance)

        params = {'migration_type': migration_type,
                  'source_node': source_node.uuid,
                  'destination_node': destination_node.uuid}
        self.solution.add_action(action_type='migrate',
                                 resource_id=instance.uuid,
                                 input_parameters=params)
        self.number_of_migrations += 1
예제 #51
0
    def choose_instance_to_migrate(self, hosts, avg_workload, workload_cache):
        """Pick up an active instance instance to migrate from provided hosts

        :param hosts: the array of dict which contains node object
        :param avg_workload: the average workload value of all nodes
        :param workload_cache: the map contains instance to workload mapping
        """
        for instance_data in hosts:
            source_node = instance_data['node']
            source_instances = self.compute_model.mapping.get_node_instances(
                source_node)
            if source_instances:
                delta_workload = instance_data['workload'] - avg_workload
                min_delta = 1000000
                instance_id = None
                for inst_id in source_instances:
                    try:
                        # select the first active VM to migrate
                        instance = self.compute_model.get_instance_by_uuid(
                            inst_id)
                        if (instance.state !=
                                element.InstanceState.ACTIVE.value):
                            LOG.debug("Instance not active, skipped: %s",
                                      instance.uuid)
                            continue
                        current_delta = (delta_workload -
                                         workload_cache[inst_id])
                        if 0 <= current_delta < min_delta:
                            min_delta = current_delta
                            instance_id = inst_id
                    except wexc.InstanceNotFound:
                        LOG.error(_LE("Instance not found; error: %s"),
                                  instance_id)
                if instance_id:
                    return (
                        source_node,
                        self.compute_model.get_instance_by_uuid(instance_id))
            else:
                LOG.info(_LI("VM not found from node: %s"), source_node.uuid)
예제 #52
0
    def choose_vm_to_migrate(self, cluster_data_model, hosts):
        """pick up an active vm instance to migrate from provided hosts"""

        for hvmap in hosts:
            mig_src_hypervisor = hvmap['hv']
            vms_of_src = cluster_data_model.get_mapping().get_node_vms(
                mig_src_hypervisor)
            if len(vms_of_src) > 0:
                for vm_id in vms_of_src:
                    try:
                        # select the first active VM to migrate
                        vm = cluster_data_model.get_vm_from_id(vm_id)
                        if vm.state != vm_state.VMState.ACTIVE.value:
                            LOG.info(_LE("VM not active, skipped: %s"),
                                     vm.uuid)
                            continue
                        return mig_src_hypervisor, vm
                    except wexc.InstanceNotFound as e:
                        LOG.info("VM not found Error: %s" % e.message)
                        pass

        return None
    def calculate_score_node(self, node):
        """Calculate the score that represent the utilization level

        :param node: :py:class:`~.ComputeNode` instance
        :return: Score for the given compute node
        :rtype: float
        """
        host_avg_cpu_util = self.get_node_cpu_usage(node)

        if host_avg_cpu_util is None:
            resource_id = "%s_%s" % (node.uuid, node.hostname)
            LOG.error(
                _LE("No values returned by %(resource_id)s "
                    "for %(metric_name)s") %
                dict(resource_id=resource_id,
                     metric_name=self.METRIC_NAMES[self.config.datasource]
                     ['host_cpu_usage']))
            host_avg_cpu_util = 100

        total_cores_used = node.vcpus * (host_avg_cpu_util / 100.0)

        return self.calculate_weight(node, total_cores_used, 0, 0)
    def add_migration(self, instance, source_node, destination_node):
        """Add an action for VM migration into the solution.

        :param instance: instance object
        :param source_node: node object
        :param destination_node: node object
        :return: None
        """
        instance_state_str = self.get_state_str(instance.state)
        if instance_state_str != element.InstanceState.ACTIVE.value:
            # Watcher curently only supports live VM migration and block live
            # VM migration which both requires migrated VM to be active.
            # When supported, the cold migration may be used as a fallback
            # migration mechanism to move non active VMs.
            LOG.error(
                _LE('Cannot live migrate: instance_uuid=%(instance_uuid)s, '
                    'state=%(instance_state)s.') %
                dict(instance_uuid=instance.uuid,
                     instance_state=instance_state_str))
            return

        migration_type = 'live'

        destination_node_state_str = self.get_state_str(destination_node.state)
        if destination_node_state_str == element.ServiceState.DISABLED.value:
            self.add_action_enable_compute_node(destination_node)

        if self.compute_model.migrate_instance(instance, source_node,
                                               destination_node):
            params = {
                'migration_type': migration_type,
                'source_node': source_node.uuid,
                'destination_node': destination_node.uuid
            }
            self.solution.add_action(action_type='migrate',
                                     resource_id=instance.uuid,
                                     input_parameters=params)
            self.number_of_migrations += 1
예제 #55
0
    def group_hosts_by_cpu_util(self):
        """Calculate the workloads of each node

        try to find out the nodes which have reached threshold
        and the nodes which are under threshold.
        and also calculate the average workload value of all nodes.
        and also generate the instance workload map.
        """

        nodes = self.compute_model.get_all_compute_nodes()
        cluster_size = len(nodes)
        if not nodes:
            raise wexc.ClusterEmpty()
        # get cpu cores capacity of nodes and instances
        cap_cores = self.compute_model.get_resource_from_id(
            element.ResourceType.cpu_cores)
        overload_hosts = []
        nonoverload_hosts = []
        # total workload of cluster
        # it's the total core numbers being utilized in a cluster.
        cluster_workload = 0.0
        # use workload_cache to store the workload of VMs for reuse purpose
        workload_cache = {}
        for node_id in nodes:
            node = self.compute_model.get_node_from_id(
                node_id)
            instances = self.compute_model.mapping.get_node_instances(node)
            node_workload = 0.0
            for instance_id in instances:
                instance = self.compute_model.get_instance_from_id(instance_id)
                try:
                    cpu_util = self.ceilometer.statistic_aggregation(
                        resource_id=instance_id,
                        meter_name=self._meter,
                        period=self._period,
                        aggregate='avg')
                except Exception as exc:
                    LOG.exception(exc)
                    LOG.error(_LE("Can not get cpu_util from Ceilometer"))
                    continue
                if cpu_util is None:
                    LOG.debug("Instance (%s): cpu_util is None", instance_id)
                    continue
                instance_cores = cap_cores.get_capacity(instance)
                workload_cache[instance_id] = cpu_util * instance_cores / 100
                node_workload += workload_cache[instance_id]
                LOG.debug("VM (%s): cpu_util %f", instance_id, cpu_util)
            node_cores = cap_cores.get_capacity(node)
            hy_cpu_util = node_workload / node_cores * 100

            cluster_workload += node_workload

            instance_data = {
                'node': node, "cpu_util": hy_cpu_util,
                'workload': node_workload}
            if hy_cpu_util >= self.threshold:
                # mark the node to release resources
                overload_hosts.append(instance_data)
            else:
                nonoverload_hosts.append(instance_data)

        avg_workload = cluster_workload / cluster_size

        return overload_hosts, nonoverload_hosts, avg_workload, workload_cache