def instruction_process_engine(self): self.init_conn() ps = r.pubsub(ignore_subscribe_messages=False) ps.subscribe(config['instruction_channel']) while True: if Utils.exit_flag: msg = 'Thread instruction_process_engine say bye-bye' print msg logger.info(msg=msg) return threads_status['instruction_process_engine'] = dict() threads_status['instruction_process_engine']['timestamp'] = ji.Common.ts() # noinspection PyBroadException try: msg = ps.get_message(timeout=config['engine_cycle_interval']) if msg is None or 'data' not in msg or not isinstance(msg['data'], basestring): continue try: msg = json.loads(msg['data']) if msg['action'] == 'pong': continue if msg['action'] == 'ping': # 通过 ping pong 来刷存在感。因为经过实际测试发现,当订阅频道长时间没有数据来往,那么订阅者会被自动退出。 r.publish(config['instruction_channel'], message=json.dumps({'action': 'pong'})) continue except ValueError as e: logger.error(e.message) log_emit.error(e.message) continue if 'node_id' in msg and int(msg['node_id']) != self.node_id: continue # 下列语句繁琐写法如 <code>if 'action' not in msg or 'uuid' not in msg:</code> if not all([key in msg for key in ['_object', 'action']]): continue extend_data = dict() if msg['_object'] == 'guest': self.refresh_guest_mapping() if msg['action'] not in ['create']: if msg['uuid'] not in self.guest_mapping_by_uuid: if config['DEBUG']: _log = u' '.join([u'uuid', msg['uuid'], u'在计算节点', self.hostname, u'中未找到.']) logger.debug(_log) log_emit.debug(_log) raise RuntimeError('The uuid ' + msg['uuid'] + ' not found in current domains list.') self.guest = self.guest_mapping_by_uuid[msg['uuid']] if not isinstance(self.guest, libvirt.virDomain): raise RuntimeError('Guest ' + msg['uuid'] + ' is not a domain.') if msg['action'] == 'create': t = threading.Thread(target=Guest.create, args=(self.conn, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'reboot': if self.guest.reboot() != 0: raise RuntimeError('Guest reboot failure.') elif msg['action'] == 'force_reboot': self.guest.destroy() self.guest.create() Guest.quota(guest=self.guest, msg=msg) elif msg['action'] == 'shutdown': if self.guest.shutdown() != 0: raise RuntimeError('Guest shutdown failure.') elif msg['action'] == 'force_shutdown': if self.guest.destroy() != 0: raise RuntimeError('Guest force shutdown failure.') elif msg['action'] == 'boot': if not self.guest.isActive(): if self.guest.create() != 0: raise RuntimeError('Guest boot failure.') Guest.quota(guest=self.guest, msg=msg) elif msg['action'] == 'suspend': if self.guest.suspend() != 0: raise RuntimeError('Guest suspend failure.') elif msg['action'] == 'resume': if self.guest.resume() != 0: raise RuntimeError('Guest resume failure.') elif msg['action'] == 'delete': root = ET.fromstring(self.guest.XMLDesc()) if self.guest.isActive(): self.guest.destroy() self.guest.undefine() system_disk = None for _disk in root.findall('devices/disk'): if 'vda' == _disk.find('target').get('dev'): system_disk = _disk if msg['storage_mode'] in [StorageMode.ceph.value, StorageMode.glusterfs.value]: # 签出系统镜像路径 path_list = system_disk.find('source').attrib['name'].split('/') if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = path_list[0] Guest.init_gfapi() try: Guest.gf.remove('/'.join(path_list[1:])) except OSError: pass elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: file_path = system_disk.find('source').attrib['file'] try: os.remove(file_path) except OSError: pass elif msg['action'] == 'reset_password': if self.guest.setUserPassword(msg['user'], msg['password']) != 0: raise RuntimeError('Guest reset password failure.') elif msg['action'] == 'attach_disk': if 'xml' not in msg: _log = u'添加磁盘缺少 xml 参数' raise KeyError(_log) flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG if self.guest.isActive(): flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE # 添加磁盘成功返回时,ret值为0。可参考 Linux 命令返回值规范? if self.guest.attachDeviceFlags(xml=msg['xml'], flags=flags) != 0: raise RuntimeError('Attack disk failure.') Guest.quota(guest=self.guest, msg=msg) elif msg['action'] == 'detach_disk': if 'xml' not in msg: _log = u'分离磁盘缺少 xml 参数' raise KeyError(_log) flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG if self.guest.isActive(): flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE if self.guest.detachDeviceFlags(xml=msg['xml'], flags=flags) != 0: raise RuntimeError('Detach disk failure.') elif msg['action'] == 'update_ssh_key': if not self.guest.isActive(): _log = u'欲更新 SSH-KEY 的目标虚拟机未处于活动状态。' logger.warning(_log) log_emit.warn(_log) continue ret = Guest.update_ssh_key(guest=self.guest, msg=msg) logger.info(json.dumps(ret, ensure_ascii=False)) elif msg['action'] == 'allocate_bandwidth': t = threading.Thread(target=Guest.allocate_bandwidth, args=(self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'adjust_ability': t = threading.Thread(target=Guest.adjust_ability, args=(self.conn, self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'migrate': # duri like qemu+ssh://destination_host/system if 'duri' not in msg: _log = u'迁移操作缺少 duri 参数' raise KeyError(_log) # https://rk4n.github.io/2016/08/10/qemu-post-copy-and-auto-converge-features/ flags = libvirt.VIR_MIGRATE_PERSIST_DEST | \ libvirt.VIR_MIGRATE_UNDEFINE_SOURCE | \ libvirt.VIR_MIGRATE_COMPRESSED | \ libvirt.VIR_MIGRATE_PEER2PEER | \ libvirt.VIR_MIGRATE_AUTO_CONVERGE root = ET.fromstring(self.guest.XMLDesc()) if msg['storage_mode'] == StorageMode.local.value: # 需要把磁盘存放路径加入到两边宿主机的存储池中 # 不然将会报 no storage pool with matching target path '/opt/Images' 错误 flags |= libvirt.VIR_MIGRATE_NON_SHARED_DISK flags |= libvirt.VIR_MIGRATE_LIVE if not self.guest.isActive(): _log = u'非共享存储不支持离线迁移。' logger.error(_log) log_emit.error(_log) raise RuntimeError('Nonsupport online migrate with storage of non sharing mode.') if self.init_ssh_client(hostname=msg['duri'].split('/')[2], user='******'): for _disk in root.findall('devices/disk'): _file_path = _disk.find('source').get('file') disk_info = Disk.disk_info_by_local(image_path=_file_path) disk_size = disk_info['virtual-size'] stdin, stdout, stderr = self.ssh_client.exec_command( ' '.join(['qemu-img', 'create', '-f', 'qcow2', _file_path, str(disk_size)])) for line in stdout: logger.info(line) log_emit.info(line) for line in stderr: logger.error(line) log_emit.error(line) elif msg['storage_mode'] in [StorageMode.shared_mount.value, StorageMode.ceph.value, StorageMode.glusterfs.value]: if self.guest.isActive(): flags |= libvirt.VIR_MIGRATE_LIVE flags |= libvirt.VIR_MIGRATE_TUNNELLED else: flags |= libvirt.VIR_MIGRATE_OFFLINE if self.guest.migrateToURI(duri=msg['duri'], flags=flags) == 0: if msg['storage_mode'] == StorageMode.local.value: for _disk in root.findall('devices/disk'): _file_path = _disk.find('source').get('file') if _file_path is not None: os.remove(_file_path) else: raise RuntimeError('Unknown storage mode.') elif msg['_object'] == 'disk': if msg['action'] == 'create': if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() if not Disk.make_qemu_image_by_glusterfs(gf=Guest.gf, dfs_volume=msg['dfs_volume'], image_path=msg['image_path'], size=msg['size']): raise RuntimeError('Create disk failure with glusterfs.') elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: if not Disk.make_qemu_image_by_local(image_path=msg['image_path'], size=msg['size']): raise RuntimeError('Create disk failure with local storage mode.') elif msg['action'] == 'delete': if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() if Disk.delete_qemu_image_by_glusterfs(gf=Guest.gf, image_path=msg['image_path']) \ is not None: raise RuntimeError('Delete disk failure with glusterfs.') elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: if Disk.delete_qemu_image_by_local(image_path=msg['image_path']) is not None: raise RuntimeError('Delete disk failure with local storage mode.') elif msg['action'] == 'resize': if 'size' not in msg: _log = u'添加磁盘缺少 disk 或 disk["size"] 参数' raise KeyError(_log) used = False if msg['guest_uuid'].__len__() == 36: used = True if used: self.refresh_guest_mapping() if msg['guest_uuid'] not in self.guest_mapping_by_uuid: if config['DEBUG']: _log = u' '.join([u'uuid', msg['uuid'], u'在计算节点', self.hostname, u'中未找到.']) logger.debug(_log) log_emit.debug(_log) raise RuntimeError('Resize disk failure, because the uuid ' + msg['guest_uuid'] + ' not found in current domains.') self.guest = self.guest_mapping_by_uuid[msg['guest_uuid']] if not isinstance(self.guest, libvirt.virDomain): raise RuntimeError('Resize disk failure, because the guest is not a domain.') # 在线磁盘扩容 if used and self.guest.isActive(): if 'device_node' not in msg: _log = u'添加磁盘缺少 disk 或 disk["device_node|size"] 参数' raise KeyError(_log) # 磁盘大小默认单位为KB,乘以两个 1024,使其单位达到GB msg['size'] = int(msg['size']) * 1024 * 1024 if self.guest.blockResize(disk=msg['device_node'], size=msg['size']) != 0: raise RuntimeError('Online resize disk failure in blockResize method.') Guest.quota(guest=self.guest, msg=msg) # 离线磁盘扩容 else: if not all([key in msg for key in ['storage_mode', 'dfs_volume', 'image_path']]): _log = u'添加磁盘缺少 disk 或 disk["storage_mode|dfs_volume|image_path|size"] 参数' raise KeyError(_log) if msg['storage_mode'] == StorageMode.glusterfs.value: if not Disk.resize_qemu_image_by_glusterfs(dfs_volume=msg['dfs_volume'], image_path=msg['image_path'], size=msg['size']): raise RuntimeError('Offline resize disk failure with glusterfs.') elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: if not Disk.resize_qemu_image_by_local(image_path=msg['image_path'], size=msg['size']): raise RuntimeError('Offline resize disk failure with local storage mode.') elif msg['action'] == 'quota': self.refresh_guest_mapping() if msg['guest_uuid'] not in self.guest_mapping_by_uuid: if config['DEBUG']: _log = u' '.join([u'uuid', msg['guest_uuid'], u'在计算节点', self.hostname, u'中未找到.']) logger.debug(_log) log_emit.debug(_log) raise RuntimeError('Disk quota failure, because the uuid ' + msg['guest_uuid'] + ' not found in current domains.') self.guest = self.guest_mapping_by_uuid[msg['guest_uuid']] if not isinstance(self.guest, libvirt.virDomain): raise RuntimeError('Disk quota failure, because the guest is not a domain.') if not self.guest.isActive(): _log = u'磁盘 ' + msg['uuid'] + u' 所属虚拟机未处于活动状态。' logger.warning(_log) log_emit.warn(_log) continue Guest.quota(guest=self.guest, msg=msg) elif msg['_object'] == 'snapshot': self.refresh_guest_mapping() if msg['uuid'] not in self.guest_mapping_by_uuid: if config['DEBUG']: _log = u' '.join([u'uuid', msg['uuid'], u'在计算节点', self.hostname, u'中未找到.']) logger.debug(_log) log_emit.debug(_log) raise RuntimeError('Snapshot ' + msg['action'] + ' failure, because the uuid ' + msg['uuid'] + ' not found in current domains.') self.guest = self.guest_mapping_by_uuid[msg['uuid']] if not isinstance(self.guest, libvirt.virDomain): raise RuntimeError('Snapshot ' + msg['action'] + ' failure, because the guest is not a domain.') if msg['action'] == 'create': t = threading.Thread(target=Guest.create_snapshot, args=(self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'delete': t = threading.Thread(target=Guest.delete_snapshot, args=(self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'revert': t = threading.Thread(target=Guest.revert_snapshot, args=(self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'convert': t = threading.Thread(target=Guest.convert_snapshot, args=(msg,)) t.setDaemon(False) t.start() continue elif msg['_object'] == 'os_template_image': if msg['action'] == 'delete': if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() try: Guest.gf.remove(msg['template_path']) except OSError: pass elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: try: os.remove(msg['template_path']) except OSError: pass elif msg['_object'] == 'global': if msg['action'] == 'refresh_guest_state': host_use_for_refresh_guest_state = Host() t = threading.Thread(target=host_use_for_refresh_guest_state.refresh_guest_state, args=()) t.setDaemon(False) t.start() continue else: _log = u'未支持的 _object:' + msg['_object'] logger.error(_log) log_emit.error(_log) response_emit.success(_object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except redis.exceptions.ConnectionError as e: logger.error(traceback.format_exc()) # 防止循环线程,在redis连接断开时,混水写入日志 time.sleep(5) except: # 防止循环线程,在redis连接断开时,混水写入日志 time.sleep(5) logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure(_object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), passback_parameters=msg.get('passback_parameters'))
def create(cls, conn, msg): try: Guest.storage_mode = msg['storage_mode'] guest = Guest(uuid=msg['uuid'], name=msg['name'], template_path=msg['template_path'], disk=msg['disks'][0], xml=msg['xml']) if Guest.storage_mode == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() guest.system_image_path = guest.disk['path'] q_creating_guest.put({ 'storage_mode': Guest.storage_mode, 'dfs_volume': Guest.dfs_volume, 'uuid': guest.uuid, 'template_path': guest.template_path, 'system_image_path': guest.system_image_path }) if not guest.generate_system_image(): raise RuntimeError('System image generate failure.') if not guest.define_by_xml(conn=conn): raise RuntimeError( 'Define the instance of virtual machine by xml failure.') guest_event_emit.creating(uuid=guest.uuid, progress=92) disk_info = dict() if Guest.storage_mode == StorageMode.glusterfs.value: disk_info = Disk.disk_info_by_glusterfs( dfs_volume=guest.dfs_volume, image_path=guest.system_image_path) elif Guest.storage_mode in [ StorageMode.local.value, StorageMode.shared_mount.value ]: disk_info = Disk.disk_info_by_local( image_path=guest.system_image_path) # 由该线程最顶层的异常捕获机制,处理其抛出的异常 guest.execute_os_template_initialize_operates( guest=conn.lookupByUUIDString(uuidstr=guest.uuid), os_template_initialize_operates=msg[ 'os_template_initialize_operates'], os_type=msg['os_type']) extend_data = dict() extend_data.update({'disk_info': disk_info}) guest_event_emit.creating(uuid=guest.uuid, progress=97) if not guest.start_by_uuid(conn=conn): raise RuntimeError( 'Start the instance of virtual machine by uuid failure.') cls.quota(guest=conn.lookupByUUIDString(uuidstr=guest.uuid), msg=msg) response_emit.success( _object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure( _object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), passback_parameters=msg.get('passback_parameters'))