def init_state(): """ Initialize a dict for storing the state of the data collector. """ """ @@@@注:这里可以放在xdrs-host的初始化过程中来实现,并写入数据表HostInitData; 这里的操作就是对这个数据表进行数据的初始化操作; 其实这个系统有个瓶颈,就是若干数据库访问量过大,根据云平台集群规模和所建立的 虚拟机数量的大小,可能同时会有大量的数据表更新等操作访问数据库。 db.update_host(hostname, int(host_cpu_mhz * host_cpu_usable_by_vms), physical_cpus, host_ram) """ hosts_api = hosts.API() vir_connection = libvirt.openReadOnly(None) hostname = vir_connection.getHostname() """ 获取本地主机总的CPU MHZ和RAM数据; """ host_cpu_mhz, host_ram = get_host_characteristics(vir_connection) """ 通过libvirt获取物理CPU的数目; """ physical_cpus = physical_cpu_count(vir_connection) """ 主机中可以分配给虚拟机使用的cpu个数占总体cpu的百分比(阈值); """ host_cpu_usable_by_vms = float(CONF.host_cpu_usable_by_vms) local_cpu_mhz = int(host_cpu_mhz * host_cpu_usable_by_vms) host_cpu_overload_threshold = float(CONF.host_cpu_overload_threshold) * \ host_cpu_usable_by_vms physical_core_mhz = host_cpu_mhz / physical_cpus host_id = 2 """在系统初始化的过程中随机生成;""" init_data = { 'host_name': hostname, 'host_id': host_id, 'local_cpu_mhz': local_cpu_mhz, 'physical_cpus': physical_cpus, 'host_ram': host_ram, 'previous_time': 0., 'previous_cpu_time': dict(), 'previous_cpu_mhz': dict(), 'previous_host_cpu_time_total': 0., 'previous_host_cpu_time_busy': 0., 'previous_overload': -1, 'host_cpu_overload_threshold': host_cpu_overload_threshold, 'physical_cpu_mhz': host_cpu_mhz, 'physical_core_mhz': physical_core_mhz } try: host_init_data = hosts_api.create_host_init_data(init_data) except exception.HostInitDataNotFound: msg = _('host init data not found') raise webob.exc.HTTPBadRequest(explanation=msg) """
def _host_cpu_overload_process(context, hosts_select_3, vms_cpu_data, hosts_cpu_data): hosts_api = hosts.API() hosts_select_4 = dict() overload_algorithm = hosts_api.get_overload_algorithm_in_used(context) overload_algorithm_name = overload_algorithm['algorithm_name'] overload_algorithm_params = overload_algorithm['algorithm_params'] overload_algorithm_fuction = CONF.overload_algorithm_path + '.' + overload_algorithm_name overload_algorithm_fuction_params = [overload_algorithm_params] for host_uuid, vm_list_temp in hosts_select_3: vir_connection = libvirt.openReadOnly(host_uuid) physical_cpu_mhz_total = int(_physical_cpu_mhz_total(vir_connection) * float(CONF.host_cpu_usable_by_vms)) overload = True while overload: vm = choice(vm_list_temp) vm_list_temp.delete(vm) for vm in vm_list_temp: host_cpu_utilization = _vm_mhz_to_percentage( vms_cpu_data[vm], hosts_cpu_data[host_uuid], physical_cpu_mhz_total) """ 调用确定的过载检测算法进行主机的过载检测; """ overload, overload_detection_state = \ overload_algorithm_fuction(host_cpu_utilization, overload_algorithm_fuction_params) hosts_select_4[host_uuid] = vm_list_temp return hosts_select_4
def _get_hosts_statics(context, hosts_list): """ 获取所有主机的可用ram和CPU相关数据; 注:这里的hosts_cpu_data是通过get_host_cpu_data_temp_by_id获取, 从HostCpuDataTemp数据表获取; """ hosts_api = hosts.API() hosts_cpu_data = dict() hosts_total_ram = dict() hosts_free_ram = dict() for host in hosts_list: try: cpu_data = hosts_api.get_host_cpu_data_temp_by_id(context, host) except exception.HostCpuDataNotFound: msg = _('host cpu data not found') raise webob.exc.HTTPBadRequest(explanation=msg) hosts_cpu_data[host] = cpu_data['cpu_data'] try: host_meminfo = hosts_api.get_meminfo_by_id(context, host) except exception.HostMemroyInfoNotFound: msg = _('host memroy info not found') raise webob.exc.HTTPBadRequest(explanation=msg) hosts_total_ram[host] = host_meminfo['MemTotal'] hosts_free_ram[host] = host_meminfo['MemFree'] return hosts_cpu_data, hosts_total_ram, hosts_free_ram
def _compute_host_cpu_mhz(context, host_uuid_temp): hosts_api = hosts.API() try: host_init_data = hosts_api.compute_host_cpu_mhz(context, host_uuid_temp) except exception.HostInitDataNotFound: msg = _('host init data not found') raise webob.exc.HTTPBadRequest(explanation=msg)
def __init__(self, compute_driver=None, *args, **kwargs): self.hosts_api = hosts.API() self.data_collection_rpcapi = data_collection_rpcapi.DataCollectionRPCAPI() self.load_detection_rpcapi = load_detection_rpcapi.LoadDetectionRPCAPI() self.vms_selection_rpcapi = vms_selection_rpcapi.VmsSelectionRPCAPI() self.vms_migration_rpcapi = vms_migration_rpcapi.VmMigrationRPCAPI() self.controller_rpcapi = controller_rpcapi.ControllerRPCAPI() super(ControllerManager, self).__init__(service_name="xdrs_controller", *args, **kwargs)
def _get_filter_scheduler_algorithms_in_use(context): hosts_api = hosts.API() filter_scheduler_algorithms_fuctions = list() filter_scheduler_algorithms_names = hosts_api.get_filter_scheduler_algorithms_in_used(context) for algorithm_name in filter_scheduler_algorithms_names: algorithm_fuction = CONF.filter_scheduler_algorithm_path + '.' + algorithm_name filter_scheduler_algorithms_fuctions.add(algorithm_fuction) return filter_scheduler_algorithms_fuctions
def local_vms_select(context): hosts_api = hosts.API() context = context.get_admin_context() vm_uuids_temp = _get_previous_vms(CONF.local_data_directory) host_path = CONF.local_data_directory vir_connection = libvirt.openReadOnly(None) vm_ram = _get_ram(vir_connection, vm_uuids_temp) migration_time = _calculate_migration_time( vm_ram, float(CONF.network_migration_bandwidth)) vm_select_algorithm = hosts_api.get_vm_select_algorithm_in_used(context) vm_select_algorithm_name = vm_select_algorithm['algorithm_name'] vm_select_algorithm_params = vm_select_algorithm['algorithm_params'] vm_select_algorithm_fuction = CONF.vm_select_algorithm_path + '.' + vm_select_algorithm_name vm_select_algorithm_fuction_params = [ vm_select_algorithm_params, migration_time ] overload_algorithm = hosts_api.get_overload_algorithm_in_used(context) overload_algorithm_name = overload_algorithm['algorithm_name'] overload_algorithm_params = overload_algorithm['algorithm_params'] overload_algorithm_fuction = CONF.overload_algorithm_path + '.' + overload_algorithm_name overload_algorithm_fuction_params = [ overload_algorithm_params, migration_time ] host_load_state_temp = 'overload' vm_mrigation_list = list() while host_load_state_temp == 'overload': host_cpu_mhz = _get_local_host_data(host_path) """ 注:可以从数据库中获取; """ physical_cpu_mhz_total = int( _physical_cpu_mhz_total(vir_connection) * float(CONF.host_cpu_usable_by_vms)) vms_uuid = vm_select_algorithm_fuction( vm_select_algorithm_fuction_params) del vm_uuids_temp[vms_uuid] host_cpu_utilization_temp = _vm_mhz_to_percentage( vm_uuids_temp, host_cpu_mhz, physical_cpu_mhz_total) host_load_state_temp, overload_detection_state = overload_algorithm_fuction( host_cpu_utilization_temp, overload_algorithm_fuction_params) vm_mrigation_list.add(vms_uuid) return vm_mrigation_list
def _get_all_available_hosts(context): hosts_api = hosts.API() hosts = novaclient(context).hosts.index() hosts_states = hosts_api.get_all_hosts_load_states_sorted_list(context) hosts_temp = list() for uuid, host_load_state in hosts_states: if host_load_state == 'normalload': hosts_temp.add(uuid) for i in hosts_temp: if i not in hosts['id']: del hosts_temp[i] available_hosts = hosts_temp return available_hosts
def compute_host_cpu_mhz(self, context, host_uuid_temp): hosts_api = hosts.API() try: host_init_data_temp = hosts_api.get_host_init_data_temp( context, host_uuid_temp) except exception.HostInitDataNotFound: msg = _('host init data not found') raise webob.exc.HTTPBadRequest(explanation=msg) physical_cpu_mhz = host_init_data_temp['physical_cpu_mhz'] previous_host_cpu_time_total = host_init_data_temp[ 'previous_host_cpu_time_total'] previous_host_cpu_time_busy = host_init_data_temp[ 'previous_host_cpu_time_busy'] (host_cpu_time_total, host_cpu_time_busy, host_cpu_mhz) = self._get_host_cpu_mhz(physical_cpu_mhz, previous_host_cpu_time_total, previous_host_cpu_time_busy)
def _fetch_remote_data(data_length, uuids): """ 访问中央数据库获取指定uuid的虚拟机数据; """ hosts_api = hosts.API() vm_cpu_data = dict() for uuid in uuids: try: vm_data = hosts_api.get_vm_cpu_data_by_vm_id(uuid) except exception.VmCpuDataNotFound: msg = _('vm cpu data not found') raise webob.exc.HTTPBadRequest(explanation=msg) vm_cpu_data_all = vm_data['cpu_data'] data_length = vm_cpu_data_all.length vm_cpu_data_part = vm_cpu_data_all[data_length - 10:data_length] vm_cpu_data[uuid] = vm_cpu_data_part return vm_cpu_data
def _get_vms_statics(context, vms_list, host_uuid): """ 获取所有要迁移虚拟机(vms_list)的RAM和CPU使用相关数据; """ hosts_api = hosts.API() vms_cpu_data = dict() vms_ram_data = dict() for vm in vms_list: try: cpu_data = hosts_api.get_vm_cpu_data_by_vm_id(context, vm) except exception.NotFound: raise webob.exc.HTTPNotFound() vms_cpu_data[vm] = cpu_data['cpu_data'] try: vms_ram_data = hosts_api.get_vms_ram_on_specific(context, vms_list, host_uuid) except exception.VmsOnHostRamNotFoune: msg = _('vms on specific host ram data not found.') raise webob.exc.HTTPBadRequest(explanation=msg) return vms_cpu_data, vms_ram_data
def local_load_detect(context): hosts_api = hosts.API() context = context.get_admin_context() """ 1.确定存储本地虚拟机数据的路径; """ vm_path = _build_local_vm_path(CONF.local_data_directory) """ 2.从本地存储文件读取虚拟机实例的采集数据; """ vm_cpu_mhz = _get_local_vm_data(vm_path) """ 3.为每一个UUID指定的虚拟机实例的获取其最大RAM值; """ vir_connection = libvirt.openReadOnly(None) vm_ram = _get_ram(vir_connection, vm_cpu_mhz.keys()) """ 4.删除在UUID列表中没有出现的虚拟机实例的记录信息; """ vm_cpu_mhz = _cleanup_vm_data(vm_cpu_mhz, vm_ram.keys()) """ 5.如果没有获取到vm_cpu_mhz数据,说明当前的主机是处于闲置状态, 即其上没有虚拟机实例在运行,所以直接返回; """ if not vm_cpu_mhz: return False """ 6.确定存储本地本地主机数据的路径; """ host_path = _build_local_host_path(CONF.local_data_directory) """ 7.从本地存储路径读取本地主机的采集数据; """ host_cpu_mhz = _get_local_host_data(host_path) """ 8.由历史虚拟机CPU利用率数据和历史主机CPU使用数据,共同来计算主机的CPU利用率百分比; @@@@注:这里需要重点看一下,虚拟机CPU利用率和主机CPU利用率的关系; """ """ physical_cpu_mhz_total为常数,所有可用的CPU核频率之和(MHz); _physical_cpu_mhz_total:通过libvirt获取所有CPU核频率之和(MHz)(CPU数目*单个CPU频率); host_cpu_usable_by_vms:主机中可以分配给虚拟机使用的cpu个数占总体cpu的百分比(阈值); """ physical_cpu_mhz_total = int( _physical_cpu_mhz_total(vir_connection) * float(CONF.host_cpu_usable_by_vms)) """ vm_cpu_mhz:从本地读取虚拟机实例的采集数据(经过过滤); host_cpu_mhz:从本地读取本地主机的采集数据; physical_cpu_mhz_total:所有可用的CPU核频率之和(MHz); """ host_cpu_utilization = _vm_mhz_to_percentage( vm_cpu_mhz.values(), host_cpu_mhz, physical_cpu_mhz_total) if not host_cpu_utilization: return False """ 9.根据虚拟机实例的RAM使用率数据和配置文件中定义的虚拟机实例迁移所允许的网络带宽 来计算虚拟机迁移的平均迁移时间; network_migration_bandwidth:虚拟机实例迁移所允许的网络带宽(这里定义为10MB); @@@@注:这里计算的是所有虚拟机实例中每一个虚拟机实例平均的迁移时间; """ migration_time = _calculate_migration_time( vm_ram, float(CONF.network_migration_bandwidth) ) """ 10.确定用于进行主机欠载检测的算法及其参数; (1)从配置文件解析算法参数; (2)从配置参数读取配置选项,确定本地主机的欠载检测算法; 注:读取数据库获取算法名称和算法配置参数; 所实现的四种简单的欠载检测算法中,time_step和migration_time是没有用处的; """ underload_algorithm = hosts_api.get_underload_algorithm_in_used(context) underload_algorithm_name = underload_algorithm['algorithm_name'] underload_algorithm_params = underload_algorithm['algorithm_params'] underload_algorithm_fuction = CONF.underload_algorithm_path + '.' + underload_algorithm_name underload_algorithm_fuction_params = [underload_algorithm_params, migration_time] """ 11.确定用于进行主机过载检测的算法及其参数; (1)从配置文件解析算法参数; (2)从配置参数读取配置选项,确定本地主机的过载检测算法; 注:读取数据库获取算法名称和算法配置参数; 所实现的三种简单的过载检测算法中,time_step和migration_time是没有用处的; 主要应用于较为复杂的过载检测算法; 具体参数应该拿到具体的算法中进行解析; """ overload_algorithm = hosts_api.get_overload_algorithm_in_used(context) overload_algorithm_name = overload_algorithm['algorithm_name'] overload_algorithm_params = overload_algorithm['algorithm_params'] overload_algorithm_fuction = CONF.overload_algorithm_path + '.' + overload_algorithm_name overload_algorithm_fuction_params = [overload_algorithm_params, migration_time] """ 13.调用确定的欠载检测算法进行本地主机的欠载检测; """ underload, underload_detection_state = underload_algorithm_fuction(host_cpu_utilization, underload_algorithm_fuction_params) """ 14.调用确定的过载检测算法进行本地主机的过载检测; """ overload, overload_detection_state = overload_algorithm_fuction(host_cpu_utilization, overload_algorithm_fuction_params) """ @@@@从HostInitData获取本地主机的uuid; """ host_id = '' if underload: host_load_state = 'underload' if overload: host_load_state = 'overload' else: host_load_state = 'normalload' """ 更新数据表HostLoadState中的负载状态信息; """ try: host_load_state = hosts_api.update_host_load_states(context, host_id, host_load_state) except exception.HostLoadStateNotFound as ex: raise webob.exc.HTTPNotFound(explanation=ex.format_message()) return 0
def _multiple_hosts_select( context, vms_list, local_host_uuid, hosts_list, vms_cpu_data, vms_ram_data, hosts_cpu_data, hosts_total_ram, hosts_free_ram, vms_ram_total): """ 多主机选取算法主体; 参数: context:上下文环境信息; local_host_uuid:本地主机uuid; vms_list:要迁移的虚拟机列表; hosts_list:备选主机列表; vms_cpu_data:相关虚拟机实例的CPU数据; vms_ram_data:相关虚拟机实例的RAM数据; hosts_cpu_data:相关备选主机的CPU数据; hosts_total_ram:相关备选主机的总的RAM信息; hosts_free_ram:相关备选主机的空闲RAM信息; vms_ram_total:要迁移的虚拟机实例的总的RAM大小; """ """ 1 根据vms_list_0从_get_vms_statics中获取ram/cpu数据; 根据vms_list_0计算vms_list_0总的ram大小; 根据hosts_list_0从hosts_space_statics中获取ram_space/cpu_data数据; """ hosts_api = hosts.API() vms_list_global = vms_list host_noselect_list_global = hosts_list for host_uuid in hosts_list: host_init_data_create = dict() try: host_init_data = hosts_api.get_host_init_data(context, host_uuid) except exception.HostInitDataNotFound: msg = _('host init data not found') raise webob.exc.HTTPBadRequest(explanation=msg) host_init_data_create['previous_host_cpu_time_total'] = \ host_init_data['previous_host_cpu_time_total'] host_init_data_create['previous_host_cpu_time_busy'] = \ host_init_data['previous_host_cpu_time_busy'] host_init_data_create['physical_cpu_mhz'] = \ host_init_data['physical_cpu_mhz'] host_init_data_create['host_uuid'] = \ host_init_data['host_uuid'] try: host_init_data = hosts_api.create_host_init_data_temp(context, host_init_data_create) except exception.HostInitDataNotFound: msg = _('host init data not found') raise webob.exc.HTTPBadRequest(explanation=msg) """ 2 循环遍历hosts_list_0,验证hosts_list_0中的主机是否满足虚拟机实例总的ram大小; 说明:从配置文件读取ram上限百分比和disk上限百分比的参数值; """ ram_distance = dict() for host_uuid in hosts_list: ram_distance[host_uuid] = hosts_free_ram[host_uuid]-vms_ram_total if hosts_free_ram[host_uuid] <= vms_ram_total: del hosts_list[host_uuid] """ 3 如果有满足条件的host存在,存储到hosts_select_0中,hosts_select_0为(host,vms_list)格式; """ hosts_select_0 = dict() if hosts_list is not None: for host_uuid in hosts_list: hosts_select_0[host_uuid] = vms_list """ 4 如果没有满足条件的host存在,判断不满足的原因,并采取相应的方法去除若干vms,直到主机 满足条件; (1)判断不满足的原因RAM_NOENOUGH(内存不满足),获取此种情况下与需求差距 最小的主机min_ram_distance_host; (2)采取随机选取的方法去除若干vms,直到主机满足条件(这里也许是一个改进的地方); def _host_ram_not_enough_process 输入:min_ram_distance_host; vms_list_0:要迁移的虚拟机列表; vms_statics(vms_list_0):要迁移虚拟机列表数据信息字典; 输出:hosts_select_1,hosts_select_1为(host,vms_list)格式,表示经过减去若干虚拟机实例 从而获取到可以作为部分虚拟机实例迁移目标的最合适主机(暂时第一阶段); """ if hosts_list == None: vms_select_temp, min_ram_distance_host = _host_ram_not_enough_process( context, ram_distance, vms_list, vms_ram_data) hosts_select_1 = {min_ram_distance_host: vms_select_temp} """ 5 如果hosts_select_0和hosts_select_1均为空,说明群选算法失败,直接返回; """ if hosts_select_0 is None and hosts_select_1 is None: return False """ 6 将不为空的hosts_select_0或hosts_select_1赋值给hosts_select_2; hosts_select_0和hosts_select_1不可能同时为真; """ hosts_select_2 = dict() if hosts_select_0 is not None: hosts_select_2 = hosts_select_0 else: if hosts_select_1 is not None: hosts_select_2 = hosts_select_1 """ 7 针对hosts_select_2,格式为(host1:vms_list1,host2:vms_list2......),对于每个备选主机,都 要进行预迁移的CPU利用率计算,判断每一个主机完成虚拟机迁移操作之后的运行状态,即欠载还是负载; 注:之所以有host1/host2/......等之分,是因为hosts_select_0中可能同时有多个主机满足条件,而 针对hosts_select_1,只能有一个满足条件的主机; def _migrate_host_cpu_predict 输入:hosts_select_2,格式为(host1:vms_list1,host2:vms_list1......); 输出:host_cpu_predict,格式为(host1:(state,CPU利用率),host2:(state,CPU利用率)......); """ overload_algorithm = hosts_api.get_overload_algorithm_in_used(context) overload_algorithm_name = overload_algorithm['algorithm_name'] overload_algorithm_params = overload_algorithm['algorithm_params'] overload_algorithm_fuction = CONF.overload_algorithm_path + '.' + overload_algorithm_name overload_algorithm_fuction_params = [overload_algorithm_params] underload_algorithm = hosts_api.get_underload_algorithm_in_used(context) underload_algorithm_name = underload_algorithm['algorithm_name'] underload_algorithm_params = underload_algorithm['algorithm_params'] underload_algorithm_fuction = CONF.underload_algorithm_path + '.' + underload_algorithm_name underload_algorithm_fuction_params = [underload_algorithm_params] host_cpu_predict = dict() host_cpu_predict_underload = dict() host_cpu_predict_overload = dict() host_cpu_predict_normalload = dict() for host_uuid_temp, vms_list_temp in hosts_select_2: for vm in vms_list_temp: vir_connection = libvirt.openReadOnly(host_uuid_temp) physical_cpu_mhz_total = int(_physical_cpu_mhz_total(vir_connection) * float(CONF.host_cpu_usable_by_vms)) host_cpu_utilization = _vm_mhz_to_percentage( vms_cpu_data[vm], hosts_cpu_data[host_uuid_temp], physical_cpu_mhz_total) """ 调用确定的欠载检测算法进行主机的欠载检测; """ underload, underload_detection_state = \ underload_algorithm_fuction(host_cpu_utilization, underload_algorithm_fuction_params) """ 调用确定的过载检测算法进行主机的过载检测; """ overload, overload_detection_state = \ overload_algorithm_fuction(host_cpu_utilization, overload_algorithm_fuction_params) if underload is True: load_state = 'underload' if overload is True: load_state = 'overload' if underload is False and overload is False: load_state = 'normalload' host_cpu_predict[host_uuid_temp] = (load_state, host_cpu_utilization, vms_list_temp) if load_state == 'underload': host_cpu_predict_underload[host_uuid_temp] = (host_cpu_utilization, vms_list_temp) if load_state == 'overload': host_cpu_predict_overload[host_uuid_temp] = (host_cpu_utilization, vms_list_temp) if load_state == 'normalload': host_cpu_predict_normalload[host_uuid_temp] = (host_cpu_utilization, vms_list_temp) """ 8 针对返回值host_cpu_predict,根据其中state的不同,分别赋值给host_cpu_predict_underload和host_cpu_predict_overload 和host_cpu_predict_normalload; host_cpu_predict_underload:格式为(host1:(vms_list1,CPU利用率),host2:(vms_list1,CPU利用率)......) """ """ 9 针对host_cpu_predict_normalload:格式为(host1:(vms_list1,CPU利用率),host2:(vms_list2,CPU利用率)......) (1)如果host_cpu_predict_normalload中只有一个元素,说明在进行主机预迁移的CPU利用率计算之后,只有 一个主机处于正常负载状态,则直接添加到变量hosts_select_finally中; hosts_select_finally格式为(host1:vms_list1,host2:vms_list2......); (2)如果host_cpu_predict_normalload中有多个元素,说明在进行主机预迁移的CPU利用率计算之后,有 多个主机处于正常负载状态,则选取CPU利用率最小的一个,添加到变量hosts_select_finally中; """ hosts_select_finally = dict() if len(host_cpu_predict_normalload) == 1: for host_uuid, values in host_cpu_predict_normalload: vms_list = values[1] hosts_select_finally[vms_list] = host_uuid if len(host_cpu_predict_normalload) > 1: host_cpu_utilization_sort = sorted(host_cpu_predict_normalload.iteritems(), key=lambda d:d[2], reverse = False) min_host_cpu_utilization = host_cpu_utilization_sort[0] vm_list = min_host_cpu_utilization[1] host_uuid = min_host_cpu_utilization[0] hosts_select_finally[vm_list] = host_uuid """ 10 如果host_cpu_predict_underload和host_cpu_predict_normalload为空,且host_cpu_predict_overload不为空, 则说明在进行主机预迁移的CPU利用率计算之后,所有主机均处于过载状态; 如果host_cpu_predict_overload中的元素只有一个,则直接赋值给hosts_select_3; 如果host_cpu_predict_overload中的元素大于一个,则选取所有主机中CPU利用率最小的一个赋值给 hosts_select_3; """ hosts_select_3 = dict() if host_cpu_predict_underload is None and \ host_cpu_predict_normalload is None and \ host_cpu_predict_overload is not None: if len(host_cpu_predict_overload) == 1: for host_uuid, values in host_cpu_predict_overload: vms_list = values[1] hosts_select_3[vms_list] = host_uuid if len(host_cpu_predict_overload) > 1: host_cpu_utilization_sort = sorted(host_cpu_predict_overload.iteritems(), key=lambda d:d[2], reverse = False) min_host_cpu_utilization = host_cpu_utilization_sort[0] vm_list = min_host_cpu_utilization[1] host_uuid = min_host_cpu_utilization[0] hosts_select_3[vm_list] = host_uuid """ 11 如果host_cpu_predict_normalload为空,且host_cpu_predict_underload和host_cpu_predict_overload不为空: (1)如果host_cpu_predict_underload中只有一个元素,说明说明在进行主机预迁移的CPU利用率计算之后,只有 一个主机处于欠载状态,其余全部处于过载状态,则直接添加到变量hosts_select_finally中,并进行标注, 此主机虽然迁移过后仍处于欠载状态,但是后续不可以进行虚拟机全部迁移出去的操作。 (2)如果host_cpu_predict_underload中有多个元素,则选取利用率最大的主机作为目标主机,直接添加到变量 hosts_select_finally中; """ if host_cpu_predict_normalload is None and \ host_cpu_predict_underload is not None and \ host_cpu_predict_overload is not None: if len(host_cpu_predict_underload) == 1: for host_uuid, values in host_cpu_predict_underload: vms_list = values[1] hosts_select_finally[vms_list] = host_uuid if len(host_cpu_predict_underload) > 1: host_cpu_utilization_sort = sorted(host_cpu_predict_underload.iteritems(), key=lambda d:d[2], reverse = True) min_host_cpu_utilization = host_cpu_utilization_sort[0] vm_list = min_host_cpu_utilization[1] host_uuid = min_host_cpu_utilization[0] hosts_select_finally[vm_list] = host_uuid """ 12 针对hosts_select_3中所指定的主机(主要在10中产生),循环减去其对应的vms_list中最耗费CPU资源的vm,直到满足主机 的CPU利用率处于正常状态为止; def _host_CPU_overload_process 输入:hosts_select_3,格式为(host:(vms_list,CPU利用率)); 输出:hosts_select_4,格式为(host:(vms_list,CPU利用率)); """ hosts_select_4 = _host_cpu_overload_process( context, hosts_select_3, vms_cpu_data, hosts_cpu_data) """ 13 将hosts_select_4添加到变量hosts_select_finally之中,至此完成第一轮选取; """ hosts_select_finally = hosts_select_4 """ 14 确定此时尚未确定迁移目标的虚拟机实例列表vm_noselect_list; 确定此时虚拟机实例迁移备用目标主机host_noselect_list,要根据第一轮的选择进行相应主机资源利用信息的数据库更新; """ vms_ram_total_temp = 0 for host_uuid, vm_list_temp in hosts_select_finally: for vm in vm_list_temp: vms_ram_total_temp = vms_ram_total_temp+vms_ram_data[vm] try: host_cpu_data_temp = hosts_api.get_host_cpu_data_temp_by_id(context, host_uuid) except exception.HostCpuDataNotFound: msg = _('host cpu data not found') raise webob.exc.HTTPBadRequest(explanation=msg) host_cpu_data_temp['hosts_free_ram'] = host_cpu_data_temp['hosts_free_ram'] - vms_ram_total_temp try: cpu_data = hosts_api.update_host_cpu_data_temp_by_id(context, host_cpu_data_temp, host_uuid) except exception.HostCpuDataNotFound: msg = _('host cpu data not found') raise webob.exc.HTTPBadRequest(explanation=msg) vm_noselect_list = vms_list_global-vm_list_temp host_noselect_list = host_noselect_list_global.delete(host_uuid) """ 15 判断此时vm_noselect_list是否为空, (1)如果vm_noselect_list为空,说明所有虚拟机实例均找到合适的目标主机; 则直接跳出循环; (2)如果vm_noselect_list不为空,比较vm_noselect_list与全局变量vm_noselect_list_global, 如果vm_noselect_list与vm_noselect_list_global的值相同,说明此轮主机选取操作没有为 任何vm寻找到合适的迁移目标主机,则直接跳出循环; (3)如果vm_noselect_list不为空,比较vm_noselect_list与全局变量vm_noselect_list_global, 如果vm_noselect_list与vm_noselect_list_global的值不同,说明尚有部分vms没有找到合适 的迁移目标主机,则: a 赋值vm_noselect_list给vm_noselect_list_global; b 迭代调用方法def _multiple_hosts_select 输入: vm_noselect_list:此轮要迁移的虚拟机列表; host_noselect_list:此轮备选主机列表; """ while vm_noselect_list: if vm_noselect_list is None: break vm_noselect_list_global = vm_noselect_list hosts_cpu_data, hosts_total_ram, hosts_free_ram = _get_hosts_statics(context, host_noselect_list) vms_cpu_data, vms_ram_data = _get_vms_statics(context, vm_noselect_list, local_host_uuid) vms_ram_total = 0 for vm in vm_noselect_list: vms_ram_total = vms_ram_total+vms_ram_data[vm] _multiple_hosts_select( context, vm_noselect_list, local_host_uuid, host_noselect_list, vms_cpu_data, vms_ram_data, hosts_cpu_data, hosts_total_ram, hosts_free_ram, vms_ram_total) if vm_noselect_list is not None: if vm_noselect_list == vm_noselect_list_global: break """ 16 确定最后无法选取合适主机的虚拟机列表vm_noselect_list_finally; 17 确定最后完成选取的主机和虚拟机的映射字典hosts_select_finally,其格式为(host1:vms_list1,host2:vms_list2......); """ for host_uuid_select, vm_list_select in hosts_select_finally: vm_noselect_list_finally = vms_list_global-vm_list_select return hosts_select_finally, vm_noselect_list_finally
def __init__(self, **kwargs): super(VmSelectAlgorithmsController, self).__init__(**kwargs) self.hosts_api = hosts.API()
def __init__(self, **kwargs): super(HostSchedulerAlgorithmsController, self).__init__(**kwargs) self.hosts_api = hosts.API()
def single_host_select(context, vms_list, host_uuid, hosts_list): """ 实现选取合适的目标主机用于vms的迁移操作,特点是每次实现为一个vm选取迁移的目标主机,所以 采用此种流程的特点是更有利于集群的整体负载均衡,对应于多主机选取算法; 参数: context:上下文环境信息; vms_list:所有要执行迁移操作的vm列表; hosts_list:经过前期过滤的所有被选主机列表; host_uuid:本地主机的uuid; 输出: hosts_select,其格式为(vm1:host1,vm2:host2......); """ """ 1 遍历vms_list中的所有虚拟机实例, (1)访问数据库,获取每一个虚拟机实例的CPU使用数据; 存储到vms_cpu_data中,其格式为(vm1:cpu_data1,vm2:cpu_data2......); (2)获取每一个虚拟机实例的ram信息数据,存储到vms_ram_data; """ hosts_api = hosts.API() vms_cpu_data = dict() vms_ram_data = dict() for vm in vms_list: try: cpu_data = hosts_api.get_vm_cpu_data_by_vm_id(context, vm) except exception.NotFound: raise webob.exc.HTTPNotFound() vms_cpu_data[vm] = cpu_data['cpu_data'] try: vms_ram_data = hosts_api.get_vms_ram_on_specific( context, vms_list, host_uuid) except exception.VmsOnHostRamNotFoune: msg = _('vms on specific host ram data not found.') raise webob.exc.HTTPBadRequest(explanation=msg) """ 2 遍历hosts_list中的所有主机, (1)访问数据库,获取每一个主机的CPU使用数据; 存储到hosts_cpu_data中,其格式为(host1:cpu_data1,host2:cpu_data2......); (2)获取每一个主机的资源配额信息数据,存储到hosts_ram_space; """ hosts_cpu_data = dict() hosts_total_ram = dict() hosts_free_ram = dict() for host in hosts_list: try: cpu_data = hosts_api.get_host_cpu_data_by_id(context, host) except exception.HostCpuDataNotFound: msg = _('host cpu data not found') raise webob.exc.HTTPBadRequest(explanation=msg) hosts_cpu_data[host] = cpu_data['cpu_data'] try: host_meminfo = hosts_api.get_meminfo_by_id(context, host) except exception.HostMemroyInfoNotFound: msg = _('host memroy info not found') raise webob.exc.HTTPBadRequest(explanation=msg) hosts_total_ram[host] = host_meminfo['MemTotal'] hosts_free_ram[host] = host_meminfo['MemFree'] """ 3 循环遍历vms_list,针对其中的每一个vm,循环遍历hosts_list,为每一个host作虚拟机预迁移 的CPU利用率计算,以判断虚拟机迁移到主机后,主机的运行状态为欠载还是过载; for i in vms_list for j in hosts_list (1) 判断hosts_free_ram[j]是否大于vms_ram_data[i]; (2)如果上述比较均满足条件,说明此主机资源足够此虚拟机迁移的需求; 应用hosts_cpu_data[j]和vms_cpu_data[i],进行虚拟机预迁移的CPU利用率计算; 将计算的CPU利用率,赋值给vms_hosts_cpu[i][j],格式为(cpu_data,state); (1)针对一个vm,遍历完所有的host之后,遍历vms_hosts_cpu[i],在state为正常的情况下, 选取cpu_data值最小的主机,作为本vm的迁移目标主机; 赋值到变量hosts_select,其格式为(vm1:host1,vm2:host2......); (2)根据vm的ram的数据大小,改变选用的host的hosts_ram_space数据大小; (3)如果vms_hosts_cpu中的所有state均为过载状态,说明没有合适的vm迁移目标主机; 赋值到变量hosts_select,令其值为None; """ overload_algorithm = hosts_api.get_overload_algorithm_in_used(context) overload_algorithm_name = overload_algorithm['algorithm_name'] overload_algorithm_params = overload_algorithm['algorithm_params'] overload_algorithm_fuction = CONF.overload_algorithm_path + '.' + overload_algorithm_name overload_algorithm_fuction_params = [overload_algorithm_params] vm_hosts_cpu = dict() vms_hosts_mapper = dict() for vm in vms_list: for host in hosts_list: if hosts_free_ram[host] > vms_ram_data[vm]: vir_connection = libvirt.openReadOnly(host) physical_cpu_mhz_total = int( _physical_cpu_mhz_total(vir_connection) * float(CONF.host_cpu_usable_by_vms)) host_cpu_utilization = _vm_mhz_to_percentage( vms_cpu_data[vm], hosts_cpu_data[host], physical_cpu_mhz_total) overload, overload_detection_state = overload_algorithm_fuction( host_cpu_utilization, overload_algorithm_fuction_params) vm_hosts_cpu[host] = (host_cpu_utilization, overload) count_num = 0 cpu_data = 100.0 for host_uuid, cpu_utilization in vm_hosts_cpu: host_cpu_utilization = cpu_utilization[0] host_load_state = cpu_utilization[1] if host_load_state == 'normalload': if host_cpu_utilization < cpu_data: count_num = host_uuid vms_hosts_mapper[vm] = count_num hosts_free_ram[ count_num] = hosts_free_ram[count_num] - vms_ram_data[vm] try: host_cpu_data_temp = hosts_api.update_host_cpu_data_temp_by_id( context, hosts_free_ram[count_num], count_num) except exception.HostCpuDataNotFound: msg = _('host cpu data not found') raise webob.exc.HTTPBadRequest(explanation=msg) """ 4 得到字典vms_hosts_mapper,其格式为(vm1:host1,vm2:host2......); 5 从vms_hosts_mapper中获取成功选取主机的虚拟机列表hosts_select_finally; 从vms_hosts_mapper中获取没有选取到合适主机的虚拟机列表vm_noselect_list_finally; """ vms_hosts_mapper_success = vms_hosts_mapper vms_hosts_mapper_fales = vms_hosts_mapper for vm, uuid in vms_hosts_mapper: if uuid == 0: del vms_hosts_mapper_success[vm] if uuid != 0: del vms_hosts_mapper_fales[vm] return vms_hosts_mapper_success, vms_hosts_mapper_fales
def __init__(self, **kwargs): super(VMsCpuDataController, self).__init__(**kwargs) self.hosts_api = hosts.API()
def __init__(self, **kwargs): super(UnderloadAlgorithmsController, self).__init__(**kwargs) self.hosts_api = hosts.API()
def __init__(self, **kwargs): super(HostTaskStateController, self).__init__(**kwargs) self.hosts_api = hosts.API()
def __init__(self, ext_mgr=None, **kwargs): super(Controller, self).__init__(**kwargs) self.hosts_api = hosts.API()
def local_data_collector(context): """ 注:基本不用大改(除了注的部分),需要对具体算法进行理解总结; """ """ @@@@注:通过主机名获取HOST UUID; UUID在组件初始化的过程中实现随机生成; """ host_id = 2 hosts_api = hosts.API() try: init_data = hosts_api.get_host_init_data(host_id) except exception.HostInitDataNotFound: msg = _('host init data not found') raise webob.exc.HTTPBadRequest(explanation=msg) """ 1.建立存储本地db.select_cpu_mhz_for_vm虚拟机数据和本地主机数据的路径; 注:data_collector_data_length:存储在本地的最新的数据数量; local_data_directory="/var/lib/xdrs" data_collector_data_length=100 """ vm_path = _build_local_vm_path(CONF.local_data_directory) host_path = _build_local_host_path(CONF.local_data_directory) data_length = int(CONF.data_collector_data_length) """ 2.获取指定路径下的虚拟机UUID列表; """ vms_previous = _get_previous_vms(vm_path) """ 3.通过libvirt获取本地主机的VM的UUID数据统计信息; """ vir_connection = libvirt.openReadOnly(None) vms_current = _get_current_vms(vir_connection) """ 4.通过比较新旧列表来确定新添加的虚拟机实例列表; """ vms_added = _get_added_vms(vms_previous, vms_current.keys()) added_vm_data = dict() """ 5.如果本地主机有新添加的虚拟机实例; 从中央数据库获取新添加虚拟机实例之前的数据采集信息, 因为有可能是迁移过来的虚拟机实例; (1)从中央数据库获取新添加虚拟机实例之前的数据采集信息, 因为有可能是迁移过来的虚拟机实例; (2)保存从中央数据库获取的新添加虚拟机实例的数据到本地存储文件; """ if vms_added: for i, vm in enumerate(vms_added): if vms_current[vm] != libvirt.VIR_DOMAIN_RUNNING: del vms_added[i] del vms_current[vm] _create_new_vms_files(vms_added, vm_path) added_vm_data = _fetch_remote_data(data_length, vms_added) _write_vm_data_locally(vm_path, added_vm_data, data_length) """ 6.通过比较新旧列表来确定最新删除的虚拟机实例的列表; """ vms_removed = _get_removed_vms(vms_previous, vms_current.keys()) """ 7.如果存在最新删除的虚拟机实例; 清除对应于已经删除的虚拟机实例的保存在本地的数据信息; 同时清除在初始化过程中的相关虚拟机实例的数据信息(这里后续关注数据初始化方法的实现); """ if vms_removed: _cleanup_local_vm_data(vm_path, vms_removed) """ 注:这里有待探讨,我认为应该加上在数据库中删除所有的vm的init_data数据表信息; """ for vm in vms_removed: del init_data['previous_cpu_time'][vm] del init_data['previous_cpu_mhz'][vm] """ 8.开始进行数据采集的正式操作; """ current_time = time.time() """ 9.获取虚拟机实例的平均CPU利用率数据(MHz); 注:这里是一个重点,需要好好分析; vir_connection:到libvirt的连接; physical_core_mhz:每个物理CPU core的频率(MHz); 本地主机总的CPU MHZ频率除以物理CPU的个数; previous_cpu_time:上一次的虚拟机的CPU时间; previous_time:上一次的时间戳; current_time:当前的时间戳; current_vms:当前的虚拟机实例UUID列表; previous_cpu_mhz:上一次检测所有虚拟机实例额CPU利用率数据(字典); added_vm_data:从中央数据库获取新添加虚拟机实例的以前的数据采集信息,用字典表示; """ (cpu_time, cpu_mhz) = _get_cpu_mhz(vir_connection, init_data['physical_core_mhz'], init_data['previous_cpu_time'], init_data['previous_time'], current_time, vms_current.keys(), init_data['previous_cpu_mhz'], added_vm_data) """ 12.获取本地主机的平均CPU利用率数据(MHz); 注:这里是一个重点,需要好好分析; 返回cpu_time_total(主机当前CPU总的时间),cpu_time_busy(主机当前CPU忙碌时间) cpu_usage(主机CPU平均利用率数据) """ (host_cpu_time_total, host_cpu_time_busy, host_cpu_mhz) = _get_host_cpu_mhz( init_data['physical_cpu_mhz'], init_data['previous_host_cpu_time_total'], init_data['previous_host_cpu_time_busy']) """ 13.存储采集数据到本地存储文件中,并提交到中央数据库; 如果是第一次采集数据,则不写入本地存储文件,也不提交到中央数据库; (1)存储每个虚拟机实例的CPU数据到对应的本地文件之中; (2)提交每个虚拟机实例的CPU数据到中央数据库之中; (3)计算所有虚拟机总的CPU利用率数据; (4)计算主机hypervisor的CPU利用率数据; (5)计算主机总的CPU利用率数据; (6)保存本地主机hpyervisor的CPU数据到指定的文件之中; @@@@注:这里保存的是主机平均CPU利用率减去总的虚拟机实例的CPU利用率数据的结果; 后面分析一下欠载过载的判断标准和过程,看看这里是否有可以改进的地方,即 其他数据是否有用武之地; (7)提交本地主机hpyervisor的CPU数据到中央数据库之中; (8)简单判断本地主机此时是否是过载的; """ if init_data['previous_time'] > 0: _append_vm_data_locally(vm_path, cpu_mhz, data_length) _append_vm_data_remotely(cpu_mhz) total_vms_cpu_mhz = sum(cpu_mhz.values()) host_cpu_mhz_hypervisor = host_cpu_mhz - total_vms_cpu_mhz if host_cpu_mhz_hypervisor < 0: host_cpu_mhz_hypervisor = 0 total_cpu_mhz = total_vms_cpu_mhz + host_cpu_mhz_hypervisor _append_host_data_locally(host_path, host_cpu_mhz_hypervisor, data_length) _append_host_data_remotely(init_data['hostname'], host_cpu_mhz_hypervisor) """ 记录此时本地主机是否过载; 注:此后在合适的步骤,运行状态应该更新到相关的数据表中; """ init_data['previous_overload'] = _log_host_overload( init_data['host_cpu_overload_threshold'], init_data['hostname'], init_data['previous_overload'], init_data['physical_cpu_mhz'], total_cpu_mhz) """ 14.更新若干初始化状态数据: @@@@注:这里可以考虑将这些初始化的状态数据存储到数据库中 每次进行数据采集都从数据库中读取; """ init_data['previous_time'] = current_time init_data['previous_cpu_time'] = cpu_time init_data['previous_cpu_mhz'] = cpu_mhz init_data['previous_host_cpu_time_total'] = host_cpu_time_total init_data['previous_host_cpu_time_busy'] = host_cpu_time_busy try: host_init_data = hosts_api.update_host_init_data(init_data, host_id) except exception.HostInitDataNotFound: msg = _('host init data not found') raise webob.exc.HTTPBadRequest(explanation=msg) """ 15.完成一次本地虚拟机实例和主机的数据采集操作; """ return True