class clientIF(object): """ The client interface of vdsm. Exposes vdsm verbs as xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, irs, log): """ Initialize the (single) clientIF instance :param irs: a Dispatcher object to be used as this object's irs. :type irs: :class:`storage.dispatcher.Dispatcher` :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.irs = irs if self.irs: self.irs.registerDomainStateChangeCallback(self.contEIOVms) self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self.mom = None self.bindings = {} if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} self._hostStats = sampling.HostStatsThread(log=log) self._hostStats.start() self.lastRemoteAccess = 0 self._enabled = True self._netConfigDirty = False self._prepareMOM() threading.Thread(target=self._recoverThread, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' host = config.get('addresses', 'management_ip') port = config.getint('addresses', 'management_port') self._createAcceptor(host, port) self._prepareXMLRPCBinding(port) self._prepareJSONRPCBinding() except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() if self.mom: self.mom.stop() raise @property def ready(self): return (self.irs is None or self.irs.ready) and not self._recovery def contEIOVms(self, sdUUID, isDomainStateValid): # This method is called everytime the onDomainStateChange # event is emitted, this event is emitted even when a domain goes # INVALID if this happens there is nothing to do if not isDomainStateValid: return libvirtCon = libvirtconnection.get() libvirtVms = libvirtCon.listAllDomains( libvirt.VIR_CONNECT_LIST_DOMAINS_PAUSED) with self.vmContainerLock: self.log.info("vmContainerLock acquired") for libvirtVm in libvirtVms: state = libvirtVm.state(0) if state[1] == libvirt.VIR_DOMAIN_PAUSED_IOERROR: vmId = libvirtVm.UUIDString() vmObj = self.vmContainer[vmId] if sdUUID in vmObj.sdIds: self.log.info("Cont vm %s in EIO", vmId) vmObj.cont() @classmethod def getInstance(cls, irs=None, log=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(irs, log) return cls._instance def _createAcceptor(self, host, port): sslctx = self._createSSLContext() self._acceptor = MultiProtocolAcceptor(host, port, sslctx) def _createSSLContext(self): sslctx = None if config.getboolean('vars', 'ssl'): truststore_path = config.get('vars', 'trust_store_path') key_file = os.path.join(truststore_path, 'keys', 'vdsmkey.pem') cert_file = os.path.join(truststore_path, 'certs', 'vdsmcert.pem') ca_cert = os.path.join(truststore_path, 'certs', 'cacert.pem') sslctx = SSLContext(cert_file, key_file, ca_cert) return sslctx def _prepareXMLRPCBinding(self, port): if config.getboolean('vars', 'xmlrpc_enable'): try: from rpc.BindingXMLRPC import BindingXMLRPC from rpc.BindingXMLRPC import XmlDetector except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') else: xml_binding = BindingXMLRPC(self, self.log, port) self.bindings['xmlrpc'] = xml_binding xml_detector = XmlDetector(xml_binding) self._acceptor.add_detector(xml_detector) def _prepareJSONRPCBinding(self): if config.getboolean('vars', 'jsonrpc_enable'): try: from rpc import Bridge from rpc.BindingJsonRpc import BindingJsonRpc from yajsonrpc.stompReactor import StompDetector except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') else: bridge = Bridge.DynamicBridge() json_binding = BindingJsonRpc(bridge) self.bindings['jsonrpc'] = json_binding stomp_detector = StompDetector(json_binding) self._acceptor.add_detector(stomp_detector) def _prepareMOM(self): momconf = config.get("mom", "conf") self.mom = MomThread(momconf) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] self._acceptor.stop() for binding in self.bindings.values(): binding.stop() self._enabled = False self.channelListener.stop() self._hostStats.stop() if self.mom: self.mom.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def start(self): for binding in self.bindings.values(): binding.start() self.thread = threading.Thread(target=self._acceptor.serve_forever, name='Detector thread') self.thread.setDaemon(True) self.thread.start() def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: device = drive['device'] # PDIV drive format if device == 'disk' and isVdsmImage(drive): res = self.irs.prepareImage(drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] # The order of imgVolumesInfo is not guaranteed drive['volumeChain'] = res['imgVolumesInfo'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: res = self.irs.getDevicesVisibility([drive["GUID"]]) if not res["visible"][drive["GUID"]]: raise vm.VolumeError(drive) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: raise vm.VolumeError(drive) # Update size for LUN volume drive["truesize"] = res['truesize'] drive["apparentsize"] = res['apparentsize'] volPath = res['path'] # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # cdrom and floppy drives elif (device in ('cdrom', 'floppy') and 'specParams' in drive): params = drive['specParams'] if 'vmPayload' in params: volPath = self._prepareVolumePathFromPayload( vmId, device, params['vmPayload']) # next line can be removed in future, when < 3.3 engine # is not supported elif (params.get('path', '') == '' and drive.get('path', '') == ''): volPath = '' else: volPath = drive.get('path', '') elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def _prepareVolumePathFromPayload(self, vmId, device, payload): """ param vmId: VM UUID or None param device: either 'floppy' or 'cdrom' param payload: a dict formed like this: {'volId': 'volume id', # volId is optional 'file': {'filename': 'content', ...}} """ funcs = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} if device not in funcs: raise vm.VolumeError("Unsupported 'device': %s" % device) func = getattr(supervdsm.getProxy(), funcs[device]) return func(vmId, payload['file'], payload.get('volId')) def teardownVolumePath(self, drive): res = {'status': doneCode} try: if isVdsmImage(drive): res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except TypeError: # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def getDiskAlignment(self, drive): """ Returns the alignment of the disk partitions param drive: is either {"poolID": , "domainID": , "imageID": , "volumeID": } or {"GUID": } Return type: a dictionary with partition names as keys and True for aligned partitions and False for unaligned as values """ aligning = {} volPath = self.prepareVolumePath(drive) try: out = alignmentScan.scanImage(volPath) for line in out: aligning[line.partitionName] = line.alignmentScanResult finally: self.teardownVolumePath(drive) return {'status': doneCode, 'alignment': aligning} def createVm(self, vmParams, vmRecover=False): with self.vmContainerLock: self.log.info("vmContainerLock acquired by vm %s", vmParams['vmId']) try: if not vmRecover: if vmParams['vmId'] in self.vmContainer: self.log.warning('vm %s already exists' % vmParams['vmId']) return errCode['exist'] vm = Vm(self, vmParams, vmRecover) self.vmContainer[vmParams['vmId']] = vm finally: container_len = len(self.vmContainer) vm.run() self.log.debug("Total desktops after creation of %s is %d" % (vmParams['vmId'], container_len)) return {'status': doneCode, 'vmList': vm.status()} @utils.traceback() def _recoverThread(self): # Trying to run recover process until it works. During that time vdsm # stays in recovery mode (_recover=True), means all api requests # returns with "vdsm is in initializing process" message. utils.retry(self._recoverExistingVms, sleep=5) def _recoverExistingVms(self): try: # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) migration.SourceThread.setMaxOutgoingMigrations(mog) # Recover for v in getVDSMDomains(): vmId = v.UUIDString() if not self._recoverVm(vmId): # RH qemu proc without recovery self.log.info( 'loose qemu process with id: ' '%s found, killing it.', vmId) try: v.destroy() except libvirt.libvirtError: self.log.error( 'failed to kill loose qemu ' 'process with id: %s', vmId, exc_info=True) # we do this to safely handle VMs which disappeared # from the host while VDSM was down/restarting recVms = self._getVDSMVmsFromRecovery() if recVms: self.log.warning( 'Found %i VMs from recovery files not' ' reported by libvirt.' ' This should not happen!' ' Will try to recover them.', len(recVms)) for vmId in recVms: if not self._recoverVm(vmId): self.log.warning( 'VM %s failed to recover from recovery' ' file, reported as Down', vmId) while (self._enabled and vmstatus.WAIT_FOR_LAUNCH in [v.lastStatus for v in self.vmContainer.values()]): time.sleep(1) self._cleanOldFiles() self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations while self._enabled and self.vmContainer and \ not self.irs.getConnectedStoragePoolsList()['poollist']: time.sleep(5) for vmId, vmObj in self.vmContainer.items(): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: vmObj.preparePaths( vmObj.buildConfDevices()[vm.DISK_DEVICES]) except: self.log.error("Vm %s recovery failed", vmId, exc_info=True) except: self.log.error("Vm's recovery failed", exc_info=True) raise def _getVDSMVmsFromRecovery(self): vms = [] for f in os.listdir(constants.P_VDSM_RUN): vmId, fileType = os.path.splitext(f) if fileType == ".recovery": if vmId not in self.vmContainer: vms.append(vmId) return vms def _recoverVm(self, vmid): try: recoveryFile = constants.P_VDSM_RUN + vmid + ".recovery" params = pickle.load(file(recoveryFile)) now = time.time() pt = float(params.pop('startTime', now)) params['elapsedTimeOffset'] = now - pt self.log.debug("Trying to recover " + params['vmId']) if not self.createVm(params, vmRecover=True)['status']['code']: return recoveryFile except: self.log.debug("Error recovering VM", exc_info=True) return None def _cleanOldFiles(self): for f in os.listdir(constants.P_VDSM_RUN): try: vmId, fileType = f.split(".", 1) if fileType in [ "guest.socket", "monitor.socket", "pid", "stdio.dump", "recovery" ]: if vmId in self.vmContainer: continue if f == 'vdsmd.pid': continue if f == 'respawn.pid': continue if f == 'supervdsmd.pid': continue if f == 'supervdsm_respawn.pid': continue else: continue self.log.debug("removing old file " + f) utils.rmFile(constants.P_VDSM_RUN + f) except: pass def dispatchLibvirtEvents(self, conn, dom, *args): try: eventid = args[-1] vmid = dom.UUIDString() v = self.vmContainer.get(vmid) if not v: self.log.debug('unknown vm %s eventid %s args %s', vmid, eventid, args) return if eventid == libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE: event, detail = args[:-1] v._onLibvirtLifecycleEvent(event, detail, None) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_REBOOT: v.onReboot() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_RTC_CHANGE: utcoffset, = args[:-1] v._rtcUpdate(utcoffset) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_IO_ERROR_REASON: srcPath, devAlias, action, reason = args[:-1] v._onIOError(devAlias, reason, action) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_GRAPHICS: phase, localAddr, remoteAddr, authScheme, subject = args[:-1] v.log.debug( 'graphics event phase ' '%s localAddr %s remoteAddr %s' 'authScheme %s subject %s', phase, localAddr, remoteAddr, authScheme, subject) if phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE: v.onConnect(remoteAddr['node']) elif phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT: v.onDisconnect() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_WATCHDOG: action, = args[:-1] v._onWatchdogEvent(action) else: v.log.warning('unknown eventid %s args %s', eventid, args) except: self.log.error("Error running VM callback", exc_info=True)
class clientIF: """ The client interface of vdsm. Exposes vdsm verbs as xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, irs, log): """ Initialize the (single) clientIF instance :param irs: a Dispatcher object to be used as this object's irs. :type irs: :class:`storage.dispatcher.Dispatcher` :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.irs = irs if self.irs: self.irs.registerDomainStateChangeCallback(self.contEIOVms) self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self.mom = None if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} self._hostStats = sampling.HostStatsThread(log=log) self._hostStats.start() self.lastRemoteAccess = 0 self._memLock = threading.Lock() self._enabled = True self._netConfigDirty = False self._prepareMOM() threading.Thread(target=self._recoverThread, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() if self.mom: self.mom.stop() raise self._prepareBindings() @property def ready(self): return (self.irs is None or self.irs.ready) and not self._recovery def contEIOVms(self, sdUUID, isDomainStateValid): # This method is called everytime the onDomainStateChange # event is emitted, this event is emitted even when a domain goes # INVALID if this happens there is nothing to do if not isDomainStateValid: return libvirtCon = libvirtconnection.get() libvirtVms = libvirtCon.listAllDomains( libvirt.VIR_CONNECT_LIST_DOMAINS_PAUSED) with self.vmContainerLock: self.log.info("vmContainerLock acquired") for libvirtVm in libvirtVms: state = libvirtVm.state(0) if state[1] == libvirt.VIR_DOMAIN_PAUSED_IOERROR: vmId = libvirtVm.UUIDString() vmObj = self.vmContainer[vmId] if sdUUID in vmObj.sdIds: self.log.info("Cont vm %s in EIO", vmId) vmObj.cont() @classmethod def getInstance(cls, irs=None, log=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(irs, log) return cls._instance def _loadBindingXMLRPC(self): from BindingXMLRPC import BindingXMLRPC ip = config.get('addresses', 'management_ip') xmlrpc_port = config.get('addresses', 'management_port') use_ssl = config.getboolean('vars', 'ssl') resp_timeout = config.getint('vars', 'vds_responsiveness_timeout') truststore_path = config.get('vars', 'trust_store_path') default_bridge = config.get("vars", "default_bridge") self.bindings['xmlrpc'] = BindingXMLRPC(self, self.log, ip, xmlrpc_port, use_ssl, resp_timeout, truststore_path, default_bridge) def _loadBindingJsonRpc(self): from BindingJsonRpc import BindingJsonRpc from Bridge import DynamicBridge ip = config.get('addresses', 'management_ip') port = config.getint('addresses', 'json_port') conf = [('tcp', {"ip": ip, "port": port})] self.bindings['json'] = BindingJsonRpc(DynamicBridge(), conf) def _prepareBindings(self): self.bindings = {} if config.getboolean('vars', 'xmlrpc_enable'): try: self._loadBindingXMLRPC() except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') if config.getboolean('vars', 'jsonrpc_enable'): try: self._loadBindingJsonRpc() except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') def _prepareMOM(self): momconf = config.get("mom", "conf") if isMomAvailable(): try: self.mom = MomThread(momconf) return except: self.log.warn("MOM initialization failed and fall " "back to KsmMonitor", exc_info=True) else: self.log.warn("MOM is not available, fallback to KsmMonitor") self.ksmMonitor = ksm.KsmMonitorThread(self) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] for binding in self.bindings.values(): binding.prepareForShutdown() self._enabled = False self.channelListener.stop() self._hostStats.stop() if self.mom: self.mom.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def serve(self): for binding in self.bindings.values(): binding.start() while self._enabled: time.sleep(3) def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: # PDIV drive format if drive['device'] == 'disk' and vm.isVdsmImage(drive): res = self.irs.prepareImage( drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] # The order of imgVolumesInfo is not guaranteed drive['volumeChain'] = res['imgVolumesInfo'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: res = self.irs.getDevicesVisibility([drive["GUID"]]) if not res["visible"][drive["GUID"]]: raise vm.VolumeError(drive) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: raise vm.VolumeError(drive) volPath = os.path.join("/dev/mapper", drive["GUID"]) # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # leave path == '' for empty cdrom and floppy drives ... elif (drive['device'] in ('cdrom', 'floppy') and 'specParams' in drive and # next line can be removed in future, when < 3.3 engine # is not supported drive['specParams'].get('path', '') == '' and drive.get('path', '') == '' and 'vmPayload' not in drive['specParams']): volPath = '' # ... or load the drive from vmPayload: elif drive['device'] in ('cdrom', 'floppy') and \ 'specParams' in drive and \ 'vmPayload' in drive['specParams']: ''' vmPayload is a key in specParams 'vmPayload': {'volId': 'volume id', # volId is optional 'file': {'filename': 'content', ...}} ''' mkFsNames = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} try: mkFsFunction = getattr(supervdsm.getProxy(), mkFsNames[drive['device']]) except AttributeError: raise vm.VolumeError("Unsupported 'device': %s in " "drive: %" % (drive['device'], drive)) else: files = drive['specParams']['vmPayload']['file'] volId = drive['specParams']['vmPayload'].get('volId') volPath = mkFsFunction(vmId, files, volId) elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def teardownVolumePath(self, drive): res = {'status': doneCode} try: res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except (KeyError, TypeError): # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def getDiskAlignment(self, drive): """ Returns the alignment of the disk partitions param drive: is either {"poolID": , "domainID": , "imageID": , "volumeID": } or {"GUID": } Return type: a dictionary with partition names as keys and True for aligned partitions and False for unaligned as values """ aligning = {} volPath = self.prepareVolumePath(drive) try: out = alignmentScan.scanImage(volPath) for line in out: aligning[line.partitionName] = line.alignmentScanResult finally: self.teardownVolumePath(drive) return {'status': doneCode, 'alignment': aligning} def createVm(self, vmParams, vmRecover=False): with self.vmContainerLock: self.log.info("vmContainerLock acquired by vm %s", vmParams['vmId']) try: if not vmRecover: if vmParams['vmId'] in self.vmContainer: self.log.warning('vm %s already exists' % vmParams['vmId']) return errCode['exist'] vm = Vm(self, vmParams, vmRecover) self.vmContainer[vmParams['vmId']] = vm finally: container_len = len(self.vmContainer) vm.run() self.log.debug("Total desktops after creation of %s is %d" % (vmParams['vmId'], container_len)) return {'status': doneCode, 'vmList': vm.status()} @utils.traceback() def _recoverThread(self): # Trying to run recover process until it works. During that time vdsm # stays in recovery mode (_recover=True), means all api requests # returns with "vdsm is in initializing process" message. utils.retry(self._recoverExistingVms, sleep=5) def _recoverExistingVms(self): try: # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) vm.MigrationSourceThread.setMaxOutgoingMigrations(mog) vdsmVms = self._getVDSMVms() #Recover for v in vdsmVms: vmId = v.UUIDString() if not self._recoverVm(vmId): #RH qemu proc without recovery self.log.info('loose qemu process with id: ' '%s found, killing it.', vmId) try: v.destroy() except libvirt.libvirtError: self.log.error('failed to kill loose qemu ' 'process with id: %s', vmId, exc_info=True) while (self._enabled and 'WaitForLaunch' in [v.lastStatus for v in self.vmContainer.values()]): time.sleep(1) self._cleanOldFiles() self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations while self._enabled and self.vmContainer and \ not self.irs.getConnectedStoragePoolsList()['poollist']: time.sleep(5) for vmId, vmObj in self.vmContainer.items(): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: vmObj.preparePaths( vmObj.buildConfDevices()[vm.DISK_DEVICES]) except: self.log.error("Vm %s recovery failed", vmId, exc_info=True) except: self.log.error("Vm's recovery failed", exc_info=True) raise def isVDSMVm(self, vm): """ Return True if vm seems as if it was created by vdsm. """ try: vmdom = minidom.parseString(vm.XMLDesc(0)) sysinfo = vmdom.getElementsByTagName("sysinfo")[0] except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: self.log.error("domId: %s is dead", vm.UUIDString()) else: raise except IndexError: pass # no sysinfo in xml else: systype = sysinfo.getAttribute("type") if systype == "smbios": entries = sysinfo.getElementsByTagName("entry") for entry in entries: if entry.getAttribute("name") == "product": prod = entry.firstChild.data if prod in (caps.OSName.RHEL, caps.OSName.OVIRT, caps.OSName.RHEVH, caps.OSName.FEDORA, caps.OSName.DEBIAN): return True return False def _getVDSMVms(self): """ Return a list of vdsm created VM's. """ libvirtCon = libvirtconnection.get() domIds = libvirtCon.listDomainsID() vms = [] for domId in domIds: try: vm = libvirtCon.lookupByID(domId) except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: self.log.error("domId: %s is dead", domId, exc_info=True) else: self.log.error("Can't look for domId: %s, code: %s", domId, e.get_error_code(), exc_info=True) raise else: vms.append(vm) return [vm for vm in vms if self.isVDSMVm(vm)] def _recoverVm(self, vmid): try: recoveryFile = constants.P_VDSM_RUN + vmid + ".recovery" params = pickle.load(file(recoveryFile)) now = time.time() pt = float(params.pop('startTime', now)) params['elapsedTimeOffset'] = now - pt self.log.debug("Trying to recover " + params['vmId']) if not self.createVm(params, vmRecover=True)['status']['code']: return recoveryFile except: self.log.debug("Error recovering VM", exc_info=True) return None def _cleanOldFiles(self): for f in os.listdir(constants.P_VDSM_RUN): try: vmId, fileType = f.split(".", 1) if fileType in ["guest.socket", "monitor.socket", "pid", "stdio.dump", "recovery"]: if vmId in self.vmContainer: continue if f == 'vdsmd.pid': continue if f == 'respawn.pid': continue if f == 'supervdsmd.pid': continue if f == 'supervdsm_respawn.pid': continue else: continue self.log.debug("removing old file " + f) utils.rmFile(constants.P_VDSM_RUN + f) except: pass def dispatchLibvirtEvents(self, conn, dom, *args): try: eventid = args[-1] vmid = dom.UUIDString() v = self.vmContainer.get(vmid) if not v: self.log.debug('unknown vm %s eventid %s args %s', vmid, eventid, args) return if eventid == libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE: event, detail = args[:-1] v._onLibvirtLifecycleEvent(event, detail, None) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_REBOOT: v.onReboot() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_RTC_CHANGE: utcoffset, = args[:-1] v._rtcUpdate(utcoffset) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_IO_ERROR_REASON: srcPath, devAlias, action, reason = args[:-1] v._onAbnormalStop(devAlias, reason) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_GRAPHICS: phase, localAddr, remoteAddr, authScheme, subject = args[:-1] v.log.debug('graphics event phase ' '%s localAddr %s remoteAddr %s' 'authScheme %s subject %s', phase, localAddr, remoteAddr, authScheme, subject) if phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE: v.onConnect(remoteAddr['node']) elif phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT: v.onDisconnect() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_BLOCK_JOB: path, type, status = args[:-1] v._onBlockJobEvent(path, type, status) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_WATCHDOG: action, = args[:-1] v._onWatchdogEvent(action) else: v.log.warning('unknown eventid %s args %s', eventid, args) except: self.log.error("Error running VM callback", exc_info=True)
class clientIF: """ The client interface of vdsm. Exposes vdsm verbs as xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, log): """ Initialize the (single) clientIF instance :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self._initIRS() self.mom = None if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} ifids = netinfo.nics() + netinfo.bondings() ifrates = map(netinfo.speed, ifids) self._hostStats = sampling.HostStatsThread(cif=self, log=log, ifids=ifids, ifrates=ifrates) self._hostStats.start() self.lastRemoteAccess = 0 self._memLock = threading.Lock() self._enabled = True self._netConfigDirty = False self._prepareMOM() threading.Thread(target=self._recoverExistingVms, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() if self.mom: self.mom.stop() raise self._prepareBindings() def contEIOVms(self, sdUUID, isDomainStateValid): # This method is called everytime the onDomainStateChange # event is emitted, this event is emitted even when a domain goes # INVALID if this happens there is nothing to do if not isDomainStateValid: return libvirtCon = libvirtconnection.get() libvirtVms = libvirtCon.listAllDomains( libvirt.VIR_CONNECT_LIST_DOMAINS_PAUSED) with self.vmContainerLock: self.log.info("vmContainerLock acquired") for libvirtVm in libvirtVms: state = libvirtVm.state(0) if state[1] == libvirt.VIR_DOMAIN_PAUSED_IOERROR: vmId = libvirtVm.UUIDString() vmObj = self.vmContainer[vmId] if sdUUID in vmObj.sdIds: self.log.info("Cont vm %s in EIO", vmId) vmObj.cont() @classmethod def getInstance(cls, log=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(log) return cls._instance def _loadBindingXMLRPC(self): from BindingXMLRPC import BindingXMLRPC ip = config.get('addresses', 'management_ip') xmlrpc_port = config.get('addresses', 'management_port') use_ssl = config.getboolean('vars', 'ssl') resp_timeout = config.getint('vars', 'vds_responsiveness_timeout') truststore_path = config.get('vars', 'trust_store_path') default_bridge = config.get("vars", "default_bridge") self.bindings['xmlrpc'] = BindingXMLRPC(self, self.log, ip, xmlrpc_port, use_ssl, resp_timeout, truststore_path, default_bridge) def _loadBindingJsonRpc(self): from BindingJsonRpc import BindingJsonRpc from Bridge import DynamicBridge ip = config.get('addresses', 'management_ip') port = config.getint('addresses', 'json_port') conf = [('tcp', {"ip": ip, "port": port})] self.bindings['json'] = BindingJsonRpc(DynamicBridge(), conf) def _prepareBindings(self): self.bindings = {} if config.getboolean('vars', 'xmlrpc_enable'): try: self._loadBindingXMLRPC() except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') if config.getboolean('vars', 'jsonrpc_enable'): try: self._loadBindingJsonRpc() except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') def _prepareMOM(self): momconf = config.get("mom", "conf") if isMomAvailable(): try: self.mom = MomThread(momconf) return except: self.log.warn( "MOM initialization failed and fall " "back to KsmMonitor", exc_info=True) else: self.log.warn("MOM is not available, fallback to KsmMonitor") self.ksmMonitor = ksm.KsmMonitorThread(self) def _syncLibvirtNetworks(self): """ function is mostly for upgrade from versions that did not have a libvirt network per vdsm network """ # add libvirt networks nets = netinfo.networks() bridges = netinfo.bridges() configWriter = ifcfg.ConfigWriter() for bridge in bridges: if not bridge in nets: configWriter.createLibvirtNetwork(network=bridge, bridged=True, skipBackup=True) # remove bridged networks that their bridge not exists #TODO: # this should probably go into vdsm-restore-net script for network in nets: if nets[network]['bridged'] and network not in bridges: configWriter.removeLibvirtNetwork(network, skipBackup=True) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] for binding in self.bindings.values(): binding.prepareForShutdown() self._enabled = False self.channelListener.stop() self._hostStats.stop() if self.mom: self.mom.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def serve(self): for binding in self.bindings.values(): binding.start() while self._enabled: time.sleep(3) def _initIRS(self): self.irs = None if config.getboolean('irs', 'irs_enable'): try: self.irs = Dispatcher(HSM()) except: self.log.error("Error initializing IRS", exc_info=True) else: self.irs.registerDomainStateChangeCallback(self.contEIOVms) def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: # PDIV drive format if drive['device'] == 'disk' and vm.isVdsmImage(drive): res = self.irs.prepareImage(drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] drive['volumeChain'] = res['chain'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: visible = self.irs.scanDevicesVisibility([drive["GUID"]]) if visible[drive["GUID"]] is False: self.log.error("GUID: %s is not visible", drive["GUID"]) raise vm.VolumeError(drive) volPath = os.path.join("/dev/mapper", drive["GUID"]) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: self.log.error("Change ownership on device %s failed", drive["GUID"]) raise vm.VolumeError(drive) # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # leave path == '' for empty cdrom and floppy drives ... elif drive['device'] in ('cdrom', 'floppy') and \ 'specParams' in drive and \ 'path' in drive['specParams'] and \ drive['specParams']['path'] == '': volPath = '' # ... or load the drive from vmPayload: elif drive['device'] in ('cdrom', 'floppy') and \ 'specParams' in drive and \ 'vmPayload' in drive['specParams']: ''' vmPayload is a key in specParams 'vmPayload': {'volId': 'volume id', # volId is optional 'file': {'filename': 'content', ...}} ''' mkFsNames = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} try: mkFsFunction = getattr(supervdsm.getProxy(), mkFsNames[drive['device']]) except AttributeError: raise vm.VolumeError("Unsupported 'device': %s in " "drive: %" % (drive['device'], drive)) else: files = drive['specParams']['vmPayload']['file'] volId = drive['specParams']['vmPayload'].get('volId') volPath = mkFsFunction(vmId, files, volId) elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def teardownVolumePath(self, drive): res = {'status': doneCode} try: res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except (KeyError, TypeError): # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def getDiskAlignment(self, drive): """ Returns the alignment of the disk partitions param drive: is either {"poolID": , "domainID": , "imageID": , "volumeID": } or {"GUID": } Return type: a dictionary with partition names as keys and True for aligned partitions and False for unaligned as values """ aligning = {} volPath = self.prepareVolumePath(drive) try: out = alignmentScan.scanImage(volPath) for line in out: aligning[line.partitionName] = line.alignmentScanResult finally: self.teardownVolumePath(drive) return {'status': 0, 'alignment': aligning} def createVm(self, vmParams): with self.vmContainerLock: self.log.info("vmContainerLock acquired by vm %s", vmParams['vmId']) try: if 'recover' not in vmParams: if vmParams['vmId'] in self.vmContainer: self.log.warning('vm %s already exists' % vmParams['vmId']) return errCode['exist'] vm = Vm(self, vmParams) self.vmContainer[vmParams['vmId']] = vm finally: container_len = len(self.vmContainer) vm.run() self.log.debug("Total desktops after creation of %s is %d" % (vmParams['vmId'], container_len)) return {'status': doneCode, 'vmList': vm.status()} def _initializingLibvirt(self): self._syncLibvirtNetworks() mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) vm.MigrationSourceThread.setMaxOutgoingMigrations(mog) def _recoverExistingVms(self): # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. self._initializingLibvirt() try: vdsmVms = self._getVDSMVms() #Recover for v in vdsmVms: vmId = v.UUIDString() if not self._recoverVm(vmId): #RH qemu proc without recovery self.log.info( 'loose qemu process with id: ' '%s found, killing it.', vmId) try: v.destroy() except libvirt.libvirtError: self.log.error( 'failed to kill loose qemu ' 'process with id: %s', vmId, exc_info=True) while (self._enabled and 'WaitForLaunch' in [v.lastStatus for v in self.vmContainer.values()]): time.sleep(1) self._cleanOldFiles() self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations while self._enabled and self.vmContainer and \ not self.irs.getConnectedStoragePoolsList()['poollist']: time.sleep(5) for vmId, vmObj in self.vmContainer.items(): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: vmObj.preparePaths( vmObj.getConfDevices()[vm.DISK_DEVICES]) except: self.log.error("Vm %s recovery failed", vmId, exc_info=True) except: self.log.error("Vm's recovery failed", exc_info=True) def isVDSMVm(self, vm): """ Return True if vm seems as if it was created by vdsm. """ try: vmdom = minidom.parseString(vm.XMLDesc(0)) sysinfo = vmdom.getElementsByTagName("sysinfo")[0] except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: self.log.error("domId: %s is dead", vm.UUIDString()) else: raise except IndexError: pass # no sysinfo in xml else: systype = sysinfo.getAttribute("type") if systype == "smbios": entries = sysinfo.getElementsByTagName("entry") for entry in entries: if entry.getAttribute("name") == "product": prod = entry.firstChild.data if prod in (caps.OSName.RHEL, caps.OSName.OVIRT, caps.OSName.RHEVH, caps.OSName.FEDORA, caps.OSName.DEBIAN): return True return False def _getVDSMVms(self): """ Return a list of vdsm created VM's. """ libvirtCon = libvirtconnection.get() domIds = libvirtCon.listDomainsID() vms = [] for domId in domIds: try: vm = libvirtCon.lookupByID(domId) except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: self.log.error("domId: %s is dead", domId, exc_info=True) else: self.log.error("Can't look for domId: %s, code: %s", domId, e.get_error_code(), exc_info=True) raise else: vms.append(vm) return [vm for vm in vms if self.isVDSMVm(vm)] def _recoverVm(self, vmid): try: recoveryFile = constants.P_VDSM_RUN + vmid + ".recovery" params = pickle.load(file(recoveryFile)) params['recover'] = True now = time.time() pt = float(params.pop('startTime', now)) params['elapsedTimeOffset'] = now - pt self.log.debug("Trying to recover " + params['vmId']) if not self.createVm(params)['status']['code']: return recoveryFile except: self.log.debug("Error recovering VM", exc_info=True) return None def _cleanOldFiles(self): for f in os.listdir(constants.P_VDSM_RUN): try: vmId, fileType = f.split(".", 1) if fileType in [ "guest.socket", "monitor.socket", "pid", "stdio.dump", "recovery" ]: if vmId in self.vmContainer: continue if f == 'vdsmd.pid': continue if f == 'respawn.pid': continue if f == 'supervdsmd.pid': continue if f == 'supervdsm_respawn.pid': continue else: continue self.log.debug("removing old file " + f) utils.rmFile(constants.P_VDSM_RUN + f) except: pass def dispatchLibvirtEvents(self, conn, dom, *args): try: eventid = args[-1] vmid = dom.UUIDString() v = self.vmContainer.get(vmid) if not v: self.log.debug('unknown vm %s eventid %s args %s', vmid, eventid, args) return if eventid == libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE: event, detail = args[:-1] v._onLibvirtLifecycleEvent(event, detail, None) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_REBOOT: v.onReboot() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_RTC_CHANGE: utcoffset, = args[:-1] v._rtcUpdate(utcoffset) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_IO_ERROR_REASON: srcPath, devAlias, action, reason = args[:-1] v._onAbnormalStop(devAlias, reason) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_GRAPHICS: phase, localAddr, remoteAddr, authScheme, subject = args[:-1] v.log.debug( 'graphics event phase ' '%s localAddr %s remoteAddr %s' 'authScheme %s subject %s', phase, localAddr, remoteAddr, authScheme, subject) if phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE: v.onConnect(remoteAddr['node']) elif phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT: v.onDisconnect() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_BLOCK_JOB: path, type, status = args[:-1] v._onBlockJobEvent(path, type, status) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_WATCHDOG: action, = args[:-1] v._onWatchdogEvent(action) else: v.log.warning('unknown eventid %s args %s', eventid, args) except: self.log.error("Error running VM callback", exc_info=True)
class clientIF(object): """ The client interface of vdsm. Exposes vdsm verbs as json-rpc or xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, irs, log): """ Initialize the (single) clientIF instance :param irs: a Dispatcher object to be used as this object's irs. :type irs: :class:`storage.dispatcher.Dispatcher` :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.irs = irs if self.irs: self._contEIOVmsCB = partial(clientIF.contEIOVms, proxy(self)) self.irs.registerDomainStateChangeCallback(self._contEIOVmsCB) self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self.mom = None self.bindings = {} if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} self._hostStats = sampling.HostStatsThread(log=log) self._hostStats.start() self.lastRemoteAccess = 0 self._enabled = True self._netConfigDirty = False self._prepareMOM() threading.Thread(target=self._recoverThread, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' host = config.get('addresses', 'management_ip') port = config.getint('addresses', 'management_port') self._createAcceptor(host, port) self._prepareXMLRPCBinding() self._prepareJSONRPCBinding() except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() if self.mom: self.mom.stop() raise def getVMs(self): """ Get a snapshot of the currently registered VMs. Return value will be a dict of {vmUUID: VM_object} """ with self.vmContainerLock: return self.vmContainer.copy() @property def ready(self): return (self.irs is None or self.irs.ready) and not self._recovery def contEIOVms(self, sdUUID, isDomainStateValid): # This method is called everytime the onDomainStateChange # event is emitted, this event is emitted even when a domain goes # INVALID if this happens there is nothing to do if not isDomainStateValid: return libvirtCon = libvirtconnection.get() libvirtVms = libvirtCon.listAllDomains( libvirt.VIR_CONNECT_LIST_DOMAINS_PAUSED) with self.vmContainerLock: self.log.info("vmContainerLock acquired") for libvirtVm in libvirtVms: state = libvirtVm.state(0) if state[1] == libvirt.VIR_DOMAIN_PAUSED_IOERROR: vmId = libvirtVm.UUIDString() vmObj = self.vmContainer[vmId] if sdUUID in vmObj.sdIds: self.log.info("Cont vm %s in EIO", vmId) vmObj.cont() @classmethod def getInstance(cls, irs=None, log=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(irs, log) return cls._instance def _createAcceptor(self, host, port): sslctx = self._createSSLContext() self._acceptor = MultiProtocolAcceptor(host, port, sslctx) def _createSSLContext(self): sslctx = None if config.getboolean('vars', 'ssl'): truststore_path = config.get('vars', 'trust_store_path') key_file = os.path.join(truststore_path, 'keys', 'vdsmkey.pem') cert_file = os.path.join(truststore_path, 'certs', 'vdsmcert.pem') ca_cert = os.path.join(truststore_path, 'certs', 'cacert.pem') protocol = config.get('vars', 'ssl_protocol') sslctx = SSLContext(cert_file, key_file, ca_cert=ca_cert, protocol=protocol) return sslctx def _prepareXMLRPCBinding(self): if config.getboolean('vars', 'xmlrpc_enable'): try: from rpc.bindingxmlrpc import BindingXMLRPC from rpc.bindingxmlrpc import XmlDetector except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') else: xml_binding = BindingXMLRPC(self, self.log) self.bindings['xmlrpc'] = xml_binding xml_detector = XmlDetector(xml_binding) self._acceptor.add_detector(xml_detector) def _prepareJSONRPCBinding(self): if config.getboolean('vars', 'jsonrpc_enable'): try: from rpc import Bridge from rpc.bindingjsonrpc import BindingJsonRpc from yajsonrpc.stompreactor import StompDetector except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') else: bridge = Bridge.DynamicBridge() json_binding = BindingJsonRpc(bridge) self.bindings['jsonrpc'] = json_binding stomp_detector = StompDetector(json_binding) self._acceptor.add_detector(stomp_detector) def _prepareMOM(self): momconf = config.get("mom", "conf") self.mom = MomThread(momconf) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] self._acceptor.stop() for binding in self.bindings.values(): binding.stop() self._enabled = False self.channelListener.stop() self._hostStats.stop() if self.mom: self.mom.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def start(self): for binding in self.bindings.values(): binding.start() self.thread = threading.Thread(target=self._acceptor.serve_forever, name='Detector thread') self.thread.setDaemon(True) self.thread.start() def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: device = drive['device'] # PDIV drive format if device == 'disk' and isVdsmImage(drive): res = self.irs.prepareImage( drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] # The order of imgVolumesInfo is not guaranteed drive['volumeChain'] = res['imgVolumesInfo'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: res = self.irs.getDevicesVisibility([drive["GUID"]]) if not res["visible"][drive["GUID"]]: raise vm.VolumeError(drive) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: raise vm.VolumeError(drive) # Update size for LUN volume drive["truesize"] = res['truesize'] drive["apparentsize"] = res['apparentsize'] volPath = res['path'] # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # cdrom and floppy drives elif (device in ('cdrom', 'floppy') and 'specParams' in drive): params = drive['specParams'] if 'vmPayload' in params: volPath = self._prepareVolumePathFromPayload( vmId, device, params['vmPayload']) # next line can be removed in future, when < 3.3 engine # is not supported elif (params.get('path', '') == '' and drive.get('path', '') == ''): volPath = '' else: volPath = drive.get('path', '') elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def _prepareVolumePathFromPayload(self, vmId, device, payload): """ param vmId: VM UUID or None param device: either 'floppy' or 'cdrom' param payload: a dict formed like this: {'volId': 'volume id', # volId is optional 'file': {'filename': 'content', ...}} """ funcs = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} if device not in funcs: raise vm.VolumeError("Unsupported 'device': %s" % device) func = getattr(supervdsm.getProxy(), funcs[device]) return func(vmId, payload['file'], payload.get('volId')) def teardownVolumePath(self, drive): res = {'status': doneCode} try: if isVdsmImage(drive): res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except TypeError: # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def getDiskAlignment(self, drive): """ Returns the alignment of the disk partitions param drive: is either {"poolID": , "domainID": , "imageID": , "volumeID": } or {"GUID": } Return type: a dictionary with partition names as keys and True for aligned partitions and False for unaligned as values """ aligning = {} volPath = self.prepareVolumePath(drive) try: out = alignmentScan.scanImage(volPath) for line in out: aligning[line.partitionName] = line.alignmentScanResult finally: self.teardownVolumePath(drive) return {'status': doneCode, 'alignment': aligning} def createVm(self, vmParams, vmRecover=False): with self.vmContainerLock: if not vmRecover: if vmParams['vmId'] in self.vmContainer: return errCode['exist'] vm = Vm(self, vmParams, vmRecover) self.vmContainer[vmParams['vmId']] = vm vm.run() return {'status': doneCode, 'vmList': vm.status()} def getAllVmStats(self): return [v.getStats() for v in self.vmContainer.values()] @utils.traceback() def _recoverThread(self): # Trying to run recover process until it works. During that time vdsm # stays in recovery mode (_recover=True), means all api requests # returns with "vdsm is in initializing process" message. utils.retry(self._recoverExistingVms, sleep=5) def _recoverExistingVms(self): try: # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) migration.SourceThread.setMaxOutgoingMigrations(mog) # Recover for v in getVDSMDomains(): vmId = v.UUIDString() if not self._recoverVm(vmId): # RH qemu proc without recovery self.log.info('loose qemu process with id: ' '%s found, killing it.', vmId) try: v.destroy() except libvirt.libvirtError: self.log.error('failed to kill loose qemu ' 'process with id: %s', vmId, exc_info=True) # we do this to safely handle VMs which disappeared # from the host while VDSM was down/restarting recVms = self._getVDSMVmsFromRecovery() if recVms: self.log.warning('Found %i VMs from recovery files not' ' reported by libvirt.' ' This should not happen!' ' Will try to recover them.', len(recVms)) for vmId in recVms: if not self._recoverVm(vmId): self.log.warning('VM %s failed to recover from recovery' ' file, reported as Down', vmId) while (self._enabled and vmstatus.WAIT_FOR_LAUNCH in [v.lastStatus for v in self.vmContainer.values()]): time.sleep(1) self._cleanOldFiles() self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations while self._enabled and self.vmContainer and \ not self.irs.getConnectedStoragePoolsList()['poollist']: time.sleep(5) for vmId, vmObj in self.vmContainer.items(): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: vmObj.preparePaths( vmObj.devSpecMapFromConf()[hwclass.DISK]) except: self.log.error("Vm %s recovery failed", vmId, exc_info=True) except: self.log.error("Vm's recovery failed", exc_info=True) raise def _getVDSMVmsFromRecovery(self): vms = [] for f in os.listdir(constants.P_VDSM_RUN): vmId, fileType = os.path.splitext(f) if fileType == ".recovery": if vmId not in self.vmContainer: vms.append(vmId) return vms def _recoverVm(self, vmid): try: recoveryFile = constants.P_VDSM_RUN + vmid + ".recovery" params = pickle.load(file(recoveryFile)) now = time.time() pt = float(params.pop('startTime', now)) params['elapsedTimeOffset'] = now - pt self.log.debug("Trying to recover " + params['vmId']) if not self.createVm(params, vmRecover=True)['status']['code']: return recoveryFile except: self.log.debug("Error recovering VM", exc_info=True) return None def _cleanOldFiles(self): for f in os.listdir(constants.P_VDSM_RUN): try: vmId, fileType = f.split(".", 1) if fileType in ["guest.socket", "monitor.socket", "pid", "stdio.dump", "recovery"]: if vmId in self.vmContainer: continue if f == 'vdsmd.pid': continue if f == 'respawn.pid': continue if f == 'supervdsmd.pid': continue if f == 'supervdsm_respawn.pid': continue else: continue self.log.debug("removing old file " + f) utils.rmFile(constants.P_VDSM_RUN + f) except: pass def dispatchLibvirtEvents(self, conn, dom, *args): try: eventid = args[-1] vmid = dom.UUIDString() v = self.vmContainer.get(vmid) if not v: self.log.debug('unknown vm %s eventid %s args %s', vmid, eventid, args) return if eventid == libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE: event, detail = args[:-1] v._onLibvirtLifecycleEvent(event, detail, None) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_REBOOT: v.onReboot() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_RTC_CHANGE: utcoffset, = args[:-1] v._rtcUpdate(utcoffset) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_IO_ERROR_REASON: srcPath, devAlias, action, reason = args[:-1] v._onIOError(devAlias, reason, action) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_GRAPHICS: phase, localAddr, remoteAddr, authScheme, subject = args[:-1] v.log.debug('graphics event phase ' '%s localAddr %s remoteAddr %s' 'authScheme %s subject %s', phase, localAddr, remoteAddr, authScheme, subject) if phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE: v.onConnect(remoteAddr['node']) elif phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT: v.onDisconnect() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_WATCHDOG: action, = args[:-1] v._onWatchdogEvent(action) else: v.log.warning('unknown eventid %s args %s', eventid, args) except: self.log.error("Error running VM callback", exc_info=True)
class clientIF: """ The client interface of vdsm. Exposes vdsm verbs as xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, log): """ Initialize the (single) clientIF instance :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self._initIRS() self.mom = None if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} ifids = netinfo.nics() + netinfo.bondings() ifrates = map(netinfo.speed, ifids) self._hostStats = sampling.HostStatsThread( cif=self, log=log, ifids=ifids, ifrates=ifrates) self._hostStats.start() self.lastRemoteAccess = 0 self._memLock = threading.Lock() self._enabled = True self._netConfigDirty = False self._prepareMOM() threading.Thread(target=self._recoverExistingVms, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() if self.mom: self.mom.stop() raise self._prepareBindings() @classmethod def getInstance(cls, log=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(log) return cls._instance def _getServerIP(self, addr=None): """Return the IP address we should listen on""" if addr: return addr try: addr = netinfo.getaddr(self.defaultBridge) except: pass return addr def _loadBindingXMLRPC(self): from BindingXMLRPC import BindingXMLRPC ip = self._getServerIP(config.get('addresses', 'management_ip')) xmlrpc_port = config.get('addresses', 'management_port') use_ssl = config.getboolean('vars', 'ssl') resp_timeout = config.getint('vars', 'vds_responsiveness_timeout') truststore_path = config.get('vars', 'trust_store_path') default_bridge = config.get("vars", "default_bridge") self.bindings['xmlrpc'] = BindingXMLRPC(self, self.log, ip, xmlrpc_port, use_ssl, resp_timeout, truststore_path, default_bridge) def _loadBindingJsonRpc(self): from BindingJsonRpc import BindingJsonRpc from Bridge import DynamicBridge ip = self._getServerIP(config.get('addresses', 'management_ip')) port = config.getint('addresses', 'json_port') conf = [('tcp', {"ip": ip, "port": port})] self.bindings['json'] = BindingJsonRpc(DynamicBridge(), conf) def _prepareBindings(self): self.bindings = {} if config.getboolean('vars', 'xmlrpc_enable'): try: self._loadBindingXMLRPC() except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') if config.getboolean('vars', 'jsonrpc_enable'): try: self._loadBindingJsonRpc() except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') def _prepareMOM(self): momconf = config.get("mom", "conf") if isMomAvailable(): try: self.mom = MomThread(momconf) return except: self.log.warn("MOM initialization failed and fall " "back to KsmMonitor", exc_info=True) else: self.log.warn("MOM is not available, fallback to KsmMonitor") self.ksmMonitor = ksm.KsmMonitorThread(self) def _syncLibvirtNetworks(self): """ function is mostly for upgrade from versions that did not have a libvirt network per vdsm network """ # add libvirt networks nets = netinfo.networks() bridges = netinfo.bridges() configWriter = ifcfg.ConfigWriter() for bridge in bridges: if not bridge in nets: configWriter.createLibvirtNetwork(network=bridge, bridged=True, skipBackup=True) # remove bridged networks that their bridge not exists #TODO: # this should probably go into vdsm-restore-net script for network in nets: if nets[network]['bridged'] and network not in bridges: configWriter.removeLibvirtNetwork(network, skipBackup=True) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] for binding in self.bindings.values(): binding.prepareForShutdown() self._enabled = False self.channelListener.stop() self._hostStats.stop() if self.mom: self.mom.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def serve(self): for binding in self.bindings.values(): binding.start() while self._enabled: time.sleep(3) def _initIRS(self): self.irs = None if config.getboolean('irs', 'irs_enable'): try: self.irs = Dispatcher(HSM()) except: self.log.error("Error initializing IRS", exc_info=True) def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: # PDIV drive format if drive['device'] == 'disk' and vm.isVdsmImage(drive): res = self.irs.prepareImage( drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] drive['volumeChain'] = res['chain'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: visible = self.irs.scanDevicesVisibility([drive["GUID"]]) if visible[drive["GUID"]] is False: self.log.error("GUID: %s is not visible", drive["GUID"]) raise vm.VolumeError(drive) volPath = os.path.join("/dev/mapper", drive["GUID"]) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: self.log.error("Change ownership on device %s failed", drive["GUID"]) raise vm.VolumeError(drive) # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # leave path == '' for empty cdrom and floppy drives ... elif drive['device'] in ('cdrom', 'floppy') and \ 'specParams' in drive and \ 'path' in drive['specParams'] and \ drive['specParams']['path'] == '': volPath = '' # ... or load the drive from vmPayload: elif drive['device'] in ('cdrom', 'floppy') and \ 'specParams' in drive and \ 'vmPayload' in drive['specParams']: ''' vmPayload is a key in specParams 'vmPayload': {'file': {'filename': 'content'}} ''' mkFsNames = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} try: mkFsFunction = getattr(supervdsm.getProxy(), mkFsNames[drive['device']]) except AttributeError: raise vm.VolumeError("Unsupported 'device': %s in " "drive: %" % (drive['device'], drive)) else: # the only reason for adding this variable is that # you can not write this without breaking PEP8 file_name = drive['specParams']['vmPayload']['file'] volPath = mkFsFunction(vmId, file_name) elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def teardownVolumePath(self, drive): res = {'status': doneCode} try: res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except (KeyError, TypeError): # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def createVm(self, vmParams): self.vmContainerLock.acquire() self.log.info("vmContainerLock acquired by vm %s", vmParams['vmId']) try: if 'recover' not in vmParams: if vmParams['vmId'] in self.vmContainer: self.log.warning('vm %s already exists' % vmParams['vmId']) return errCode['exist'] vm = Vm(self, vmParams) self.vmContainer[vmParams['vmId']] = vm finally: container_len = len(self.vmContainer) self.vmContainerLock.release() vm.run() self.log.debug("Total desktops after creation of %s is %d" % (vmParams['vmId'], container_len)) return {'status': doneCode, 'vmList': vm.status()} def _initializingLibvirt(self): self._syncLibvirtNetworks() mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) vm.MigrationSourceThread.setMaxOutgoingMigrations(mog) def _recoverExistingVms(self): # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. self._initializingLibvirt() try: vdsmVms = self._getVDSMVms() #Recover for v in vdsmVms: vmId = v.UUIDString() if not self._recoverVm(vmId): #RH qemu proc without recovery self.log.info('loose qemu process with id: ' '%s found, killing it.', vmId) try: v.destroy() except libvirt.libvirtError: self.log.error('failed to kill loose qemu ' 'process with id: %s', vmId, exc_info=True) while (self._enabled and 'WaitForLaunch' in [v.lastStatus for v in self.vmContainer.values()]): time.sleep(1) self._cleanOldFiles() self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations while self._enabled and self.vmContainer and \ not self.irs.getConnectedStoragePoolsList()['poollist']: time.sleep(5) for vmId, vmObj in self.vmContainer.items(): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: vmObj.preparePaths( vmObj.getConfDevices()[vm.DISK_DEVICES]) except: self.log.error("Vm %s recovery failed", vmId, exc_info=True) except: self.log.error("Vm's recovery failed", exc_info=True) def isVDSMVm(self, vm): """ Return True if vm seems as if it was created by vdsm. """ try: vmdom = minidom.parseString(vm.XMLDesc(0)) sysinfo = vmdom.getElementsByTagName("sysinfo")[0] except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: self.log.error("domId: %s is dead", vm.UUIDString()) else: raise except IndexError: pass # no sysinfo in xml else: systype = sysinfo.getAttribute("type") if systype == "smbios": entries = sysinfo.getElementsByTagName("entry") for entry in entries: if entry.getAttribute("name") == "product": prod = entry.firstChild.data if prod in (caps.OSName.RHEL, caps.OSName.OVIRT, caps.OSName.RHEVH, caps.OSName.FEDORA, caps.OSName.DEBIAN): return True return False def _getVDSMVms(self): """ Return a list of vdsm created VM's. """ libvirtCon = libvirtconnection.get() domIds = libvirtCon.listDomainsID() vms = [] for domId in domIds: try: vm = libvirtCon.lookupByID(domId) except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: self.log.error("domId: %s is dead", domId, exc_info=True) else: self.log.error("Can't look for domId: %s, code: %s", domId, e.get_error_code(), exc_info=True) raise else: vms.append(vm) return [vm for vm in vms if self.isVDSMVm(vm)] def _recoverVm(self, vmid): try: recoveryFile = constants.P_VDSM_RUN + vmid + ".recovery" params = pickle.load(file(recoveryFile)) params['recover'] = True now = time.time() pt = float(params.pop('startTime', now)) params['elapsedTimeOffset'] = now - pt self.log.debug("Trying to recover " + params['vmId']) if not self.createVm(params)['status']['code']: return recoveryFile except: self.log.debug("Error recovering VM", exc_info=True) return None def _cleanOldFiles(self): for f in os.listdir(constants.P_VDSM_RUN): try: vmId, fileType = f.split(".", 1) if fileType in ["guest.socket", "monitor.socket", "pid", "stdio.dump", "recovery"]: if vmId in self.vmContainer: continue if f == 'vdsmd.pid': continue if f == 'respawn.pid': continue if f == 'supervdsmd.pid': continue if f == 'svdsm.sock': continue if f == 'supervdsm_respawn.pid': continue else: continue self.log.debug("removing old file " + f) utils.rmFile(constants.P_VDSM_RUN + f) except: pass