def group_hosts_by_outlet_temp(self): """Group hosts based on outlet temp meters""" nodes = self.compute_model.get_all_compute_nodes() size_cluster = len(nodes) if size_cluster == 0: raise wexc.ClusterEmpty() hosts_need_release = [] hosts_target = [] for node in nodes.values(): resource_id = node.uuid outlet_temp = self.ceilometer.statistic_aggregation( resource_id=resource_id, meter_name=self._meter, period="30", aggregate='avg') # some hosts may not have outlet temp meters, remove from target if outlet_temp is None: LOG.warning(_LW("%s: no outlet temp data"), resource_id) continue LOG.debug("%s: outlet temperature %f" % (resource_id, outlet_temp)) instance_data = {'node': node, 'outlet_temp': outlet_temp} if outlet_temp >= self.threshold: # mark the node to release resources hosts_need_release.append(instance_data) else: hosts_target.append(instance_data) return hosts_need_release, hosts_target
def group_hosts_by_airflow(self): """Group hosts based on airflow meters""" nodes = self.compute_model.get_all_compute_nodes() if not nodes: raise wexc.ClusterEmpty() overload_hosts = [] nonoverload_hosts = [] for node_id in nodes: node = self.compute_model.get_node_by_uuid(node_id) resource_id = node.uuid airflow = self.ceilometer.statistic_aggregation( resource_id=resource_id, meter_name=self.meter_name_airflow, period=self._period, aggregate='avg') # some hosts may not have airflow meter, remove from target if airflow is None: LOG.warning("%s: no airflow data", resource_id) continue LOG.debug("%s: airflow %f" % (resource_id, airflow)) nodemap = {'node': node, 'airflow': airflow} if airflow >= self.threshold_airflow: # mark the node to release resources overload_hosts.append(nodemap) else: nonoverload_hosts.append(nodemap) return overload_hosts, nonoverload_hosts
def group_hosts_by_outlet_temp(self, cluster_data_model): """Group hosts based on outlet temp meters""" hypervisors = cluster_data_model.get_all_hypervisors() size_cluster = len(hypervisors) if size_cluster == 0: raise wexc.ClusterEmpty() hosts_need_release = [] hosts_target = [] for hypervisor_id in hypervisors: hypervisor = cluster_data_model.get_hypervisor_from_id( hypervisor_id) resource_id = hypervisor.uuid outlet_temp = self.ceilometer.statistic_aggregation( resource_id=resource_id, meter_name=self._meter, period="30", aggregate='avg') # some hosts may not have outlet temp meters, remove from target if outlet_temp is None: LOG.warning(_LE("%s: no outlet temp data"), resource_id) continue LOG.debug("%s: outlet temperature %f" % (resource_id, outlet_temp)) hvmap = {'hv': hypervisor, 'outlet_temp': outlet_temp} if outlet_temp >= self.threshold: # mark the hypervisor to release resources hosts_need_release.append(hvmap) else: hosts_target.append(hvmap) return hosts_need_release, hosts_target
def group_hosts_by_cpu_util(self): """Calculate the workloads of each node try to find out the nodes which have reached threshold and the nodes which are under threshold. and also calculate the average workload value of all nodes. and also generate the instance workload map. """ nodes = self.compute_model.get_all_compute_nodes() cluster_size = len(nodes) if not nodes: raise wexc.ClusterEmpty() overload_hosts = [] nonoverload_hosts = [] # total workload of cluster cluster_workload = 0.0 # use workload_cache to store the workload of VMs for reuse purpose workload_cache = {} for node_id in nodes: node = self.compute_model.get_node_by_uuid(node_id) instances = self.compute_model.get_node_instances(node) node_workload = 0.0 for instance in instances: try: cpu_util = self.ceilometer.statistic_aggregation( resource_id=instance.uuid, meter_name=self._meter, period=self._period, aggregate='avg') except Exception as exc: LOG.exception(exc) LOG.error(_LE("Can not get cpu_util from Ceilometer")) continue if cpu_util is None: LOG.debug("Instance (%s): cpu_util is None", instance.uuid) continue workload_cache[instance.uuid] = cpu_util * instance.vcpus / 100 node_workload += workload_cache[instance.uuid] LOG.debug("VM (%s): cpu_util %f", instance.uuid, cpu_util) node_cpu_util = node_workload / node.vcpus * 100 cluster_workload += node_workload instance_data = { 'node': node, "cpu_util": node_cpu_util, 'workload': node_workload } if node_cpu_util >= self.threshold: # mark the node to release resources overload_hosts.append(instance_data) else: nonoverload_hosts.append(instance_data) avg_workload = cluster_workload / cluster_size return overload_hosts, nonoverload_hosts, avg_workload, workload_cache
def pre_execute(self): LOG.info(_LI("Initializing Server Consolidation")) if not self.compute_model: raise exception.ClusterStateNotDefined() if len(self.compute_model.get_all_compute_nodes()) == 0: raise exception.ClusterEmpty() LOG.debug(self.compute_model.to_string())
def group_hosts_by_outlet_temp(self): """Group hosts based on outlet temp meters""" nodes = self.compute_model.get_all_compute_nodes() size_cluster = len(nodes) if size_cluster == 0: raise wexc.ClusterEmpty() hosts_need_release = [] hosts_target = [] metric_name = self.METRIC_NAMES[ self.config.datasource]['host_outlet_temp'] for node in nodes.values(): resource_id = node.uuid outlet_temp = None if self.config.datasource == "ceilometer": outlet_temp = self.ceilometer.statistic_aggregation( resource_id=resource_id, meter_name=metric_name, period=self.period, aggregate='avg' ) elif self.config.datasource == "gnocchi": stop_time = datetime.datetime.utcnow() start_time = stop_time - datetime.timedelta( seconds=int(self.period)) outlet_temp = self.gnocchi.statistic_aggregation( resource_id=resource_id, metric=metric_name, granularity=self.granularity, start_time=start_time, stop_time=stop_time, aggregation='mean' ) # some hosts may not have outlet temp meters, remove from target if outlet_temp is None: LOG.warning("%s: no outlet temp data", resource_id) continue LOG.debug("%s: outlet temperature %f" % (resource_id, outlet_temp)) instance_data = {'node': node, 'outlet_temp': outlet_temp} if outlet_temp >= self.threshold: # mark the node to release resources hosts_need_release.append(instance_data) else: hosts_target.append(instance_data) return hosts_need_release, hosts_target
def group_hosts_by_airflow(self): """Group hosts based on airflow meters""" nodes = self.compute_model.get_all_compute_nodes() if not nodes: raise wexc.ClusterEmpty() overload_hosts = [] nonoverload_hosts = [] for node_id in nodes: airflow = None node = self.compute_model.get_node_by_uuid( node_id) resource_id = node.uuid if self.config.datasource == "ceilometer": airflow = self.ceilometer.statistic_aggregation( resource_id=resource_id, meter_name=self.meter_name_airflow, period=self._period, aggregate='avg') elif self.config.datasource == "gnocchi": stop_time = datetime.datetime.utcnow() start_time = stop_time - datetime.timedelta( seconds=int(self._period)) airflow = self.gnocchi.statistic_aggregation( resource_id=resource_id, metric=self.meter_name_airflow, granularity=self.granularity, start_time=start_time, stop_time=stop_time, aggregation='mean') # some hosts may not have airflow meter, remove from target if airflow is None: LOG.warning("%s: no airflow data", resource_id) continue LOG.debug("%s: airflow %f" % (resource_id, airflow)) nodemap = {'node': node, 'airflow': airflow} if airflow >= self.threshold_airflow: # mark the node to release resources overload_hosts.append(nodemap) else: nonoverload_hosts.append(nodemap) return overload_hosts, nonoverload_hosts
def group_hosts_by_outlet_temp(self): """Group hosts based on outlet temp meters""" nodes = self.get_available_compute_nodes() size_cluster = len(nodes) if size_cluster == 0: raise wexc.ClusterEmpty() hosts_need_release = [] hosts_target = [] metric_name = self.METRIC_NAMES[ self.config.datasource]['host_outlet_temp'] for node in nodes.values(): resource_id = node.uuid outlet_temp = None outlet_temp = self.datasource_backend.statistic_aggregation( resource_id=resource_id, meter_name=metric_name, period=self.period, granularity=self.granularity, ) # some hosts may not have outlet temp meters, remove from target if outlet_temp is None: LOG.warning("%s: no outlet temp data", resource_id) continue LOG.debug("%(resource)s: outlet temperature %(temp)f", { 'resource': resource_id, 'temp': outlet_temp }) instance_data = {'node': node, 'outlet_temp': outlet_temp} if outlet_temp >= self.threshold: # mark the node to release resources hosts_need_release.append(instance_data) else: hosts_target.append(instance_data) return hosts_need_release, hosts_target
def execute(self, original_model): LOG.info(_LI("Initializing Sercon Consolidation")) if original_model is None: raise exception.ClusterStateNotDefined() # todo(jed) clone model current_model = original_model self.efficacy = 100 unsuccessful_migration = 0 first_migration = True size_cluster = len(current_model.get_all_hypervisors()) if size_cluster == 0: raise exception.ClusterEmpty() self.compute_attempts(size_cluster) for hypervisor_id in current_model.get_all_hypervisors(): hypervisor = current_model.get_hypervisor_from_id(hypervisor_id) count = current_model.get_mapping(). \ get_node_vms_from_id(hypervisor_id) if len(count) == 0: if hypervisor.state == hyper_state.HypervisorState.ONLINE: self.add_change_service_state( hypervisor_id, hyper_state.HypervisorState.OFFLINE.value) while self.get_allowed_migration_attempts() >= unsuccessful_migration: if not first_migration: self.efficacy = self.calculate_migration_efficacy() if self.efficacy < float(self.target_efficacy): break first_migration = False score = [] score = self.score_of_nodes(current_model, score) ''' sort compute nodes by Score decreasing ''' '' sorted_score = sorted(score, reverse=True, key=lambda x: (x[1])) LOG.debug("Hypervisor(s) BFD {0}".format(sorted_score)) ''' get Node to be released ''' if len(score) == 0: LOG.warning( _LW("The workloads of the compute nodes" " of the cluster is zero")) break node_to_release, vm_score = self.node_and_vm_score( sorted_score, score, current_model) ''' sort VMs by Score ''' sorted_vms = sorted(vm_score, reverse=True, key=lambda x: (x[1])) # BFD: Best Fit Decrease LOG.debug("VM(s) BFD {0}".format(sorted_vms)) migrations = self.calculate_num_migrations(sorted_vms, current_model, node_to_release, sorted_score) unsuccessful_migration = self.unsuccessful_migration_actualization( migrations, unsuccessful_migration) infos = { "number_of_migrations": self.number_of_migrations, "number_of_nodes_released": self.number_of_released_nodes, "efficacy": self.efficacy } LOG.debug(infos) self.solution.model = current_model self.solution.efficacy = self.efficacy return self.solution
def group_hosts(self): nodes = self.compute_model.get_all_compute_nodes() size_cluster = len(nodes) if size_cluster == 0: raise wexc.ClusterEmpty() hosts_need_release = {} hosts_target = [] for node in nodes.values(): instances_of_node = self.compute_model.get_node_instances(node) node_instance_count = len(instances_of_node) # Flag that tells us whether to skip the node or not. If True, # the node is skipped. Will be true if we find a noisy instance or # when potential priority instance will be same as potential noisy # instance loop_break_flag = False if node_instance_count > 1: instance_priority_list = [] for instance in instances_of_node: instance_priority_list.append(instance) # If there is no metadata regarding watcher-priority, it takes # DEFAULT_WATCHER_PRIORITY as priority. instance_priority_list.sort( key=lambda a: (a.get('metadata').get('watcher-priority'), self.DEFAULT_WATCHER_PRIORITY)) instance_priority_list_reverse = list(instance_priority_list) instance_priority_list_reverse.reverse() for potential_priority_instance in instance_priority_list: priority_instance = self.find_priority_instance( potential_priority_instance) if (priority_instance is not None): for potential_noisy_instance in ( instance_priority_list_reverse): if (potential_noisy_instance == potential_priority_instance): loop_break_flag = True break noisy_instance = self.find_noisy_instance( potential_noisy_instance) if noisy_instance is not None: hosts_need_release[node.uuid] = { 'priority_vm': potential_priority_instance, 'noisy_vm': potential_noisy_instance } LOG.debug("Priority VM found: %s", potential_priority_instance.uuid) LOG.debug("Noisy VM found: %s", potential_noisy_instance.uuid) loop_break_flag = True break # No need to check other instances in the node if loop_break_flag is True: break if node.uuid not in hosts_need_release: hosts_target.append(node) return hosts_need_release, hosts_target
def group_hosts_by_cpu_or_ram_util(self): """Calculate the workloads of each node try to find out the nodes which have reached threshold and the nodes which are under threshold. and also calculate the average workload value of all nodes. and also generate the instance workload map. """ nodes = self.get_available_compute_nodes() cluster_size = len(nodes) if not nodes: raise wexc.ClusterEmpty() overload_hosts = [] nonoverload_hosts = [] # total workload of cluster cluster_workload = 0.0 # use workload_cache to store the workload of VMs for reuse purpose workload_cache = {} for node_id in nodes: node = self.compute_model.get_node_by_uuid(node_id) instances = self.compute_model.get_node_instances(node) node_workload = 0.0 for instance in instances: util = None try: util = self.datasource_backend.statistic_aggregation( instance.uuid, self._meter, self._period, self._granularity, aggregation='mean', dimensions=dict(resource_id=instance.uuid)) except Exception as exc: LOG.exception(exc) LOG.error("Can not get %s from %s", self._meter, self.config.datasource) continue if util is None: LOG.debug("Instance (%s): %s is None", instance.uuid, self._meter) continue if self._meter == self.CPU_METER_NAME: workload_cache[instance.uuid] = (util * instance.vcpus / 100) else: workload_cache[instance.uuid] = util node_workload += workload_cache[instance.uuid] LOG.debug("VM (%s): %s %f", instance.uuid, self._meter, util) cluster_workload += node_workload if self._meter == self.CPU_METER_NAME: node_util = node_workload / node.vcpus * 100 else: node_util = node_workload / node.memory * 100 instance_data = { 'node': node, self._meter: node_util, 'workload': node_workload } if node_util >= self.threshold: # mark the node to release resources overload_hosts.append(instance_data) else: nonoverload_hosts.append(instance_data) avg_workload = cluster_workload / cluster_size return overload_hosts, nonoverload_hosts, avg_workload, workload_cache