class clientIF(object): """ The client interface of vdsm. Exposes vdsm verbs as xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, irs, log): """ Initialize the (single) clientIF instance :param irs: a Dispatcher object to be used as this object's irs. :type irs: :class:`storage.dispatcher.Dispatcher` :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.irs = irs if self.irs: self.irs.registerDomainStateChangeCallback(self.contEIOVms) self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self.mom = None self.bindings = {} if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} self._hostStats = sampling.HostStatsThread(log=log) self._hostStats.start() self.lastRemoteAccess = 0 self._enabled = True self._netConfigDirty = False self._prepareMOM() threading.Thread(target=self._recoverThread, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' host = config.get('addresses', 'management_ip') port = config.getint('addresses', 'management_port') self._createAcceptor(host, port) self._prepareXMLRPCBinding(port) self._prepareJSONRPCBinding() except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() if self.mom: self.mom.stop() raise @property def ready(self): return (self.irs is None or self.irs.ready) and not self._recovery def contEIOVms(self, sdUUID, isDomainStateValid): # This method is called everytime the onDomainStateChange # event is emitted, this event is emitted even when a domain goes # INVALID if this happens there is nothing to do if not isDomainStateValid: return libvirtCon = libvirtconnection.get() libvirtVms = libvirtCon.listAllDomains( libvirt.VIR_CONNECT_LIST_DOMAINS_PAUSED) with self.vmContainerLock: self.log.info("vmContainerLock acquired") for libvirtVm in libvirtVms: state = libvirtVm.state(0) if state[1] == libvirt.VIR_DOMAIN_PAUSED_IOERROR: vmId = libvirtVm.UUIDString() vmObj = self.vmContainer[vmId] if sdUUID in vmObj.sdIds: self.log.info("Cont vm %s in EIO", vmId) vmObj.cont() @classmethod def getInstance(cls, irs=None, log=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(irs, log) return cls._instance def _createAcceptor(self, host, port): sslctx = self._createSSLContext() self._acceptor = MultiProtocolAcceptor(host, port, sslctx) def _createSSLContext(self): sslctx = None if config.getboolean('vars', 'ssl'): truststore_path = config.get('vars', 'trust_store_path') key_file = os.path.join(truststore_path, 'keys', 'vdsmkey.pem') cert_file = os.path.join(truststore_path, 'certs', 'vdsmcert.pem') ca_cert = os.path.join(truststore_path, 'certs', 'cacert.pem') sslctx = SSLContext(cert_file, key_file, ca_cert) return sslctx def _prepareXMLRPCBinding(self, port): if config.getboolean('vars', 'xmlrpc_enable'): try: from rpc.BindingXMLRPC import BindingXMLRPC from rpc.BindingXMLRPC import XmlDetector except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') else: xml_binding = BindingXMLRPC(self, self.log, port) self.bindings['xmlrpc'] = xml_binding xml_detector = XmlDetector(xml_binding) self._acceptor.add_detector(xml_detector) def _prepareJSONRPCBinding(self): if config.getboolean('vars', 'jsonrpc_enable'): try: from rpc import Bridge from rpc.BindingJsonRpc import BindingJsonRpc from yajsonrpc.stompReactor import StompDetector except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') else: bridge = Bridge.DynamicBridge() json_binding = BindingJsonRpc(bridge) self.bindings['jsonrpc'] = json_binding stomp_detector = StompDetector(json_binding) self._acceptor.add_detector(stomp_detector) def _prepareMOM(self): momconf = config.get("mom", "conf") self.mom = MomThread(momconf) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] self._acceptor.stop() for binding in self.bindings.values(): binding.stop() self._enabled = False self.channelListener.stop() self._hostStats.stop() if self.mom: self.mom.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def start(self): for binding in self.bindings.values(): binding.start() self.thread = threading.Thread(target=self._acceptor.serve_forever, name='Detector thread') self.thread.setDaemon(True) self.thread.start() def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: device = drive['device'] # PDIV drive format if device == 'disk' and isVdsmImage(drive): res = self.irs.prepareImage(drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] # The order of imgVolumesInfo is not guaranteed drive['volumeChain'] = res['imgVolumesInfo'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: res = self.irs.getDevicesVisibility([drive["GUID"]]) if not res["visible"][drive["GUID"]]: raise vm.VolumeError(drive) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: raise vm.VolumeError(drive) # Update size for LUN volume drive["truesize"] = res['truesize'] drive["apparentsize"] = res['apparentsize'] volPath = res['path'] # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # cdrom and floppy drives elif (device in ('cdrom', 'floppy') and 'specParams' in drive): params = drive['specParams'] if 'vmPayload' in params: volPath = self._prepareVolumePathFromPayload( vmId, device, params['vmPayload']) # next line can be removed in future, when < 3.3 engine # is not supported elif (params.get('path', '') == '' and drive.get('path', '') == ''): volPath = '' else: volPath = drive.get('path', '') elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def _prepareVolumePathFromPayload(self, vmId, device, payload): """ param vmId: VM UUID or None param device: either 'floppy' or 'cdrom' param payload: a dict formed like this: {'volId': 'volume id', # volId is optional 'file': {'filename': 'content', ...}} """ funcs = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} if device not in funcs: raise vm.VolumeError("Unsupported 'device': %s" % device) func = getattr(supervdsm.getProxy(), funcs[device]) return func(vmId, payload['file'], payload.get('volId')) def teardownVolumePath(self, drive): res = {'status': doneCode} try: if isVdsmImage(drive): res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except TypeError: # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def getDiskAlignment(self, drive): """ Returns the alignment of the disk partitions param drive: is either {"poolID": , "domainID": , "imageID": , "volumeID": } or {"GUID": } Return type: a dictionary with partition names as keys and True for aligned partitions and False for unaligned as values """ aligning = {} volPath = self.prepareVolumePath(drive) try: out = alignmentScan.scanImage(volPath) for line in out: aligning[line.partitionName] = line.alignmentScanResult finally: self.teardownVolumePath(drive) return {'status': doneCode, 'alignment': aligning} def createVm(self, vmParams, vmRecover=False): with self.vmContainerLock: self.log.info("vmContainerLock acquired by vm %s", vmParams['vmId']) try: if not vmRecover: if vmParams['vmId'] in self.vmContainer: self.log.warning('vm %s already exists' % vmParams['vmId']) return errCode['exist'] vm = Vm(self, vmParams, vmRecover) self.vmContainer[vmParams['vmId']] = vm finally: container_len = len(self.vmContainer) vm.run() self.log.debug("Total desktops after creation of %s is %d" % (vmParams['vmId'], container_len)) return {'status': doneCode, 'vmList': vm.status()} @utils.traceback() def _recoverThread(self): # Trying to run recover process until it works. During that time vdsm # stays in recovery mode (_recover=True), means all api requests # returns with "vdsm is in initializing process" message. utils.retry(self._recoverExistingVms, sleep=5) def _recoverExistingVms(self): try: # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) migration.SourceThread.setMaxOutgoingMigrations(mog) # Recover for v in getVDSMDomains(): vmId = v.UUIDString() if not self._recoverVm(vmId): # RH qemu proc without recovery self.log.info( 'loose qemu process with id: ' '%s found, killing it.', vmId) try: v.destroy() except libvirt.libvirtError: self.log.error( 'failed to kill loose qemu ' 'process with id: %s', vmId, exc_info=True) # we do this to safely handle VMs which disappeared # from the host while VDSM was down/restarting recVms = self._getVDSMVmsFromRecovery() if recVms: self.log.warning( 'Found %i VMs from recovery files not' ' reported by libvirt.' ' This should not happen!' ' Will try to recover them.', len(recVms)) for vmId in recVms: if not self._recoverVm(vmId): self.log.warning( 'VM %s failed to recover from recovery' ' file, reported as Down', vmId) while (self._enabled and vmstatus.WAIT_FOR_LAUNCH in [v.lastStatus for v in self.vmContainer.values()]): time.sleep(1) self._cleanOldFiles() self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations while self._enabled and self.vmContainer and \ not self.irs.getConnectedStoragePoolsList()['poollist']: time.sleep(5) for vmId, vmObj in self.vmContainer.items(): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: vmObj.preparePaths( vmObj.buildConfDevices()[vm.DISK_DEVICES]) except: self.log.error("Vm %s recovery failed", vmId, exc_info=True) except: self.log.error("Vm's recovery failed", exc_info=True) raise def _getVDSMVmsFromRecovery(self): vms = [] for f in os.listdir(constants.P_VDSM_RUN): vmId, fileType = os.path.splitext(f) if fileType == ".recovery": if vmId not in self.vmContainer: vms.append(vmId) return vms def _recoverVm(self, vmid): try: recoveryFile = constants.P_VDSM_RUN + vmid + ".recovery" params = pickle.load(file(recoveryFile)) now = time.time() pt = float(params.pop('startTime', now)) params['elapsedTimeOffset'] = now - pt self.log.debug("Trying to recover " + params['vmId']) if not self.createVm(params, vmRecover=True)['status']['code']: return recoveryFile except: self.log.debug("Error recovering VM", exc_info=True) return None def _cleanOldFiles(self): for f in os.listdir(constants.P_VDSM_RUN): try: vmId, fileType = f.split(".", 1) if fileType in [ "guest.socket", "monitor.socket", "pid", "stdio.dump", "recovery" ]: if vmId in self.vmContainer: continue if f == 'vdsmd.pid': continue if f == 'respawn.pid': continue if f == 'supervdsmd.pid': continue if f == 'supervdsm_respawn.pid': continue else: continue self.log.debug("removing old file " + f) utils.rmFile(constants.P_VDSM_RUN + f) except: pass def dispatchLibvirtEvents(self, conn, dom, *args): try: eventid = args[-1] vmid = dom.UUIDString() v = self.vmContainer.get(vmid) if not v: self.log.debug('unknown vm %s eventid %s args %s', vmid, eventid, args) return if eventid == libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE: event, detail = args[:-1] v._onLibvirtLifecycleEvent(event, detail, None) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_REBOOT: v.onReboot() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_RTC_CHANGE: utcoffset, = args[:-1] v._rtcUpdate(utcoffset) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_IO_ERROR_REASON: srcPath, devAlias, action, reason = args[:-1] v._onIOError(devAlias, reason, action) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_GRAPHICS: phase, localAddr, remoteAddr, authScheme, subject = args[:-1] v.log.debug( 'graphics event phase ' '%s localAddr %s remoteAddr %s' 'authScheme %s subject %s', phase, localAddr, remoteAddr, authScheme, subject) if phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE: v.onConnect(remoteAddr['node']) elif phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT: v.onDisconnect() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_WATCHDOG: action, = args[:-1] v._onWatchdogEvent(action) else: v.log.warning('unknown eventid %s args %s', eventid, args) except: self.log.error("Error running VM callback", exc_info=True)
class clientIF(object): """ The client interface of vdsm. Exposes vdsm verbs as xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, irs, log): """ Initialize the (single) clientIF instance :param irs: a Dispatcher object to be used as this object's irs. :type irs: :class:`storage.dispatcher.Dispatcher` :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.irs = irs if self.irs: self.irs.registerDomainStateChangeCallback(self.contEIOVms) self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self.mom = None self.bindings = {} if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} self._hostStats = sampling.HostStatsThread(log=log) self._hostStats.start() self.lastRemoteAccess = 0 self._enabled = True self._netConfigDirty = False self._prepareMOM() threading.Thread(target=self._recoverThread, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' host = config.get('addresses', 'management_ip') port = config.getint('addresses', 'management_port') self._createAcceptor(host, port) self._prepareXMLRPCBinding(port) self._prepareJSONRPCBinding() except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() if self.mom: self.mom.stop() raise @property def ready(self): return (self.irs is None or self.irs.ready) and not self._recovery def contEIOVms(self, sdUUID, isDomainStateValid): # This method is called everytime the onDomainStateChange # event is emitted, this event is emitted even when a domain goes # INVALID if this happens there is nothing to do if not isDomainStateValid: return libvirtCon = libvirtconnection.get() libvirtVms = libvirtCon.listAllDomains( libvirt.VIR_CONNECT_LIST_DOMAINS_PAUSED) with self.vmContainerLock: self.log.info("vmContainerLock acquired") for libvirtVm in libvirtVms: state = libvirtVm.state(0) if state[1] == libvirt.VIR_DOMAIN_PAUSED_IOERROR: vmId = libvirtVm.UUIDString() vmObj = self.vmContainer[vmId] if sdUUID in vmObj.sdIds: self.log.info("Cont vm %s in EIO", vmId) vmObj.cont() @classmethod def getInstance(cls, irs=None, log=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(irs, log) return cls._instance def _createAcceptor(self, host, port): sslctx = self._createSSLContext() self._acceptor = MultiProtocolAcceptor(host, port, sslctx) def _createSSLContext(self): sslctx = None if config.getboolean('vars', 'ssl'): truststore_path = config.get('vars', 'trust_store_path') key_file = os.path.join(truststore_path, 'keys', 'vdsmkey.pem') cert_file = os.path.join(truststore_path, 'certs', 'vdsmcert.pem') ca_cert = os.path.join(truststore_path, 'certs', 'cacert.pem') sslctx = SSLContext(cert_file, key_file, ca_cert) return sslctx def _prepareXMLRPCBinding(self, port): if config.getboolean('vars', 'xmlrpc_enable'): try: from rpc.BindingXMLRPC import BindingXMLRPC from rpc.BindingXMLRPC import XmlDetector except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') else: xml_binding = BindingXMLRPC(self, self.log, port) self.bindings['xmlrpc'] = xml_binding xml_detector = XmlDetector(xml_binding) self._acceptor.add_detector(xml_detector) def _prepareJSONRPCBinding(self): if config.getboolean('vars', 'jsonrpc_enable'): try: from rpc import Bridge from rpc.BindingJsonRpc import BindingJsonRpc from yajsonrpc.stompReactor import StompDetector except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') else: bridge = Bridge.DynamicBridge() json_binding = BindingJsonRpc(bridge) self.bindings['jsonrpc'] = json_binding stomp_detector = StompDetector(json_binding) self._acceptor.add_detector(stomp_detector) def _prepareMOM(self): momconf = config.get("mom", "conf") if isMomAvailable(): try: self.mom = MomThread(momconf) return except: self.log.warn("MOM initialization failed and fall " "back to KsmMonitor", exc_info=True) else: self.log.warn("MOM is not available, fallback to KsmMonitor") self.ksmMonitor = ksm.KsmMonitorThread(self) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] self._acceptor.stop() for binding in self.bindings.values(): binding.stop() self._enabled = False self.channelListener.stop() self._hostStats.stop() if self.mom: self.mom.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def start(self): for binding in self.bindings.values(): binding.start() self.thread = threading.Thread(target=self._acceptor.serve_forever, name='Detector thread') self.thread.setDaemon(True) self.thread.start() def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: device = drive['device'] # PDIV drive format if device == 'disk' and isVdsmImage(drive): res = self.irs.prepareImage( drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] # The order of imgVolumesInfo is not guaranteed drive['volumeChain'] = res['imgVolumesInfo'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: res = self.irs.getDevicesVisibility([drive["GUID"]]) if not res["visible"][drive["GUID"]]: raise vm.VolumeError(drive) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: raise vm.VolumeError(drive) # Update size for LUN volume drive["truesize"] = res['truesize'] drive["apparentsize"] = res['apparentsize'] volPath = res['path'] # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # cdrom and floppy drives elif (device in ('cdrom', 'floppy') and 'specParams' in drive): params = drive['specParams'] if 'vmPayload' in params: volPath = self._prepareVolumePathFromPayload( vmId, device, params['vmPayload']) # next line can be removed in future, when < 3.3 engine # is not supported elif (params.get('path', '') == '' and drive.get('path', '') == ''): volPath = '' else: volPath = drive.get('path', '') elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def _prepareVolumePathFromPayload(self, vmId, device, payload): """ param vmId: VM UUID or None param device: either 'floppy' or 'cdrom' param payload: a dict formed like this: {'volId': 'volume id', # volId is optional 'file': {'filename': 'content', ...}} """ funcs = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} if device not in funcs: raise vm.VolumeError("Unsupported 'device': %s" % device) func = getattr(supervdsm.getProxy(), funcs[device]) return func(vmId, payload['file'], payload.get('volId')) def teardownVolumePath(self, drive): res = {'status': doneCode} try: if isVdsmImage(drive): res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except TypeError: # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def getDiskAlignment(self, drive): """ Returns the alignment of the disk partitions param drive: is either {"poolID": , "domainID": , "imageID": , "volumeID": } or {"GUID": } Return type: a dictionary with partition names as keys and True for aligned partitions and False for unaligned as values """ aligning = {} volPath = self.prepareVolumePath(drive) try: out = alignmentScan.scanImage(volPath) for line in out: aligning[line.partitionName] = line.alignmentScanResult finally: self.teardownVolumePath(drive) return {'status': doneCode, 'alignment': aligning} def createVm(self, vmParams, vmRecover=False): with self.vmContainerLock: self.log.info("vmContainerLock acquired by vm %s", vmParams['vmId']) try: if not vmRecover: if vmParams['vmId'] in self.vmContainer: self.log.warning('vm %s already exists' % vmParams['vmId']) return errCode['exist'] vm = Vm(self, vmParams, vmRecover) self.vmContainer[vmParams['vmId']] = vm finally: container_len = len(self.vmContainer) vm.run() self.log.debug("Total desktops after creation of %s is %d" % (vmParams['vmId'], container_len)) return {'status': doneCode, 'vmList': vm.status()} @utils.traceback() def _recoverThread(self): # Trying to run recover process until it works. During that time vdsm # stays in recovery mode (_recover=True), means all api requests # returns with "vdsm is in initializing process" message. utils.retry(self._recoverExistingVms, sleep=5) def _recoverExistingVms(self): try: # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) migration.SourceThread.setMaxOutgoingMigrations(mog) vdsmVms = self._getVDSMVms() # Recover for v in vdsmVms: vmId = v.UUIDString() if not self._recoverVm(vmId): # RH qemu proc without recovery self.log.info('loose qemu process with id: ' '%s found, killing it.', vmId) try: v.destroy() except libvirt.libvirtError: self.log.error('failed to kill loose qemu ' 'process with id: %s', vmId, exc_info=True) # we do this to safely handle VMs which disappeared # from the host while VDSM was down/restarting recVms = self._getVDSMVmsFromRecovery() if recVms: self.log.warning('Found %i VMs from recovery files not' ' reported by libvirt.' ' This should not happen!' ' Will try to recover them.', len(recVms)) for vmId in recVms: if not self._recoverVm(vmId): self.log.warning('VM %s failed to recover from recovery' ' file, reported as Down', vmId) while (self._enabled and vmstatus.WAIT_FOR_LAUNCH in [v.lastStatus for v in self.vmContainer.values()]): time.sleep(1) self._cleanOldFiles() self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations while self._enabled and self.vmContainer and \ not self.irs.getConnectedStoragePoolsList()['poollist']: time.sleep(5) for vmId, vmObj in self.vmContainer.items(): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: vmObj.preparePaths( vmObj.buildConfDevices()[vm.DISK_DEVICES]) except: self.log.error("Vm %s recovery failed", vmId, exc_info=True) except: self.log.error("Vm's recovery failed", exc_info=True) raise def isVDSMVm(self, vm): """ Return True if vm seems as if it was created by vdsm. """ try: vmdom = minidom.parseString(vm.XMLDesc(0)) sysinfo = vmdom.getElementsByTagName("sysinfo")[0] except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: self.log.error("domId: %s is dead", vm.UUIDString()) else: raise except IndexError: pass # no sysinfo in xml else: systype = sysinfo.getAttribute("type") if systype == "smbios": entries = sysinfo.getElementsByTagName("entry") for entry in entries: if entry.getAttribute("name") == "product": prod = entry.firstChild.data if prod in (caps.OSName.RHEL, caps.OSName.OVIRT, caps.OSName.RHEVH, caps.OSName.FEDORA, caps.OSName.DEBIAN): return True return False def _getVDSMVms(self): """ Return a list of vdsm created VM's. """ libvirtCon = libvirtconnection.get() domIds = libvirtCon.listDomainsID() vms = [] for domId in domIds: try: vm = libvirtCon.lookupByID(domId) except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: self.log.error("domId: %s is dead", domId, exc_info=True) else: self.log.error("Can't look for domId: %s, code: %s", domId, e.get_error_code(), exc_info=True) raise else: if self.isVDSMVm(vm): vms.append(vm) return vms def _getVDSMVmsFromRecovery(self): vms = [] for f in os.listdir(constants.P_VDSM_RUN): vmId, fileType = os.path.splitext(f) if fileType == ".recovery": if vmId not in self.vmContainer: vms.append(vmId) return vms def _recoverVm(self, vmid): try: recoveryFile = constants.P_VDSM_RUN + vmid + ".recovery" params = pickle.load(file(recoveryFile)) now = time.time() pt = float(params.pop('startTime', now)) params['elapsedTimeOffset'] = now - pt self.log.debug("Trying to recover " + params['vmId']) if not self.createVm(params, vmRecover=True)['status']['code']: return recoveryFile except: self.log.debug("Error recovering VM", exc_info=True) return None def _cleanOldFiles(self): for f in os.listdir(constants.P_VDSM_RUN): try: vmId, fileType = f.split(".", 1) if fileType in ["guest.socket", "monitor.socket", "pid", "stdio.dump", "recovery"]: if vmId in self.vmContainer: continue if f == 'vdsmd.pid': continue if f == 'respawn.pid': continue if f == 'supervdsmd.pid': continue if f == 'supervdsm_respawn.pid': continue else: continue self.log.debug("removing old file " + f) utils.rmFile(constants.P_VDSM_RUN + f) except: pass def dispatchLibvirtEvents(self, conn, dom, *args): try: eventid = args[-1] vmid = dom.UUIDString() v = self.vmContainer.get(vmid) if not v: self.log.debug('unknown vm %s eventid %s args %s', vmid, eventid, args) return if eventid == libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE: event, detail = args[:-1] v._onLibvirtLifecycleEvent(event, detail, None) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_REBOOT: v.onReboot() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_RTC_CHANGE: utcoffset, = args[:-1] v._rtcUpdate(utcoffset) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_IO_ERROR_REASON: srcPath, devAlias, action, reason = args[:-1] v._onIOError(devAlias, reason, action) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_GRAPHICS: phase, localAddr, remoteAddr, authScheme, subject = args[:-1] v.log.debug('graphics event phase ' '%s localAddr %s remoteAddr %s' 'authScheme %s subject %s', phase, localAddr, remoteAddr, authScheme, subject) if phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE: v.onConnect(remoteAddr['node']) elif phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT: v.onDisconnect() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_WATCHDOG: action, = args[:-1] v._onWatchdogEvent(action) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_BLOCK_JOB: path, jobType, status = args[:-1] v._onBlockJobEvent(path, jobType, status) else: v.log.warning('unknown eventid %s args %s', eventid, args) except: self.log.error("Error running VM callback", exc_info=True)
class clientIF(object): """ The client interface of vdsm. Exposes vdsm verbs as json-rpc or xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, irs, log, scheduler): """ Initialize the (single) clientIF instance :param irs: a Dispatcher object to be used as this object's irs. :type irs: :class:`storage.dispatcher.Dispatcher` :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.irs = irs if self.irs: self._contEIOVmsCB = partial(clientIF.contEIOVms, proxy(self)) self.irs.registerDomainStateChangeCallback(self._contEIOVmsCB) self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self.mom = None self.bindings = {} self._broker_client = None self._subscriptions = defaultdict(list) self._scheduler = scheduler if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} self._hostStats = sampling.HostStatsThread(sampling.host_samples) self._hostStats.start() self.lastRemoteAccess = 0 self._enabled = True self._netConfigDirty = False self._prepareMOM() secret.clear() concurrent.thread(self._recoverThread, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' host = config.get('addresses', 'management_ip') port = config.getint('addresses', 'management_port') self._createAcceptor(host, port) self._prepareXMLRPCBinding() self._prepareJSONRPCBinding() self._connectToBroker() except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() raise def getVMs(self): """ Get a snapshot of the currently registered VMs. Return value will be a dict of {vmUUID: VM_object} """ with self.vmContainerLock: return self.vmContainer.copy() @property def ready(self): return (self.irs is None or self.irs.ready) and not self._recovery def notify(self, event_id, **kwargs): """ Send notification using provided subscription id as event_id and a dictionary as event body. Before sending there is notify_time added on top level to the dictionary. """ notification = Notification( event_id, self._send_notification, ) notification.emit(**kwargs) def _send_notification(self, message): self.bindings['jsonrpc'].reactor.server.send( message, config.get('addresses', 'event_queue')) def contEIOVms(self, sdUUID, isDomainStateValid): # This method is called everytime the onDomainStateChange # event is emitted, this event is emitted even when a domain goes # INVALID if this happens there is nothing to do if not isDomainStateValid: return libvirtCon = libvirtconnection.get() libvirtVms = libvirtCon.listAllDomains( libvirt.VIR_CONNECT_LIST_DOMAINS_PAUSED) with self.vmContainerLock: self.log.info("vmContainerLock acquired") for libvirtVm in libvirtVms: state = libvirtVm.state(0) if state[1] == libvirt.VIR_DOMAIN_PAUSED_IOERROR: vmId = libvirtVm.UUIDString() vmObj = self.vmContainer[vmId] if sdUUID in vmObj.sdIds: self.log.info("Cont vm %s in EIO", vmId) vmObj.cont() @classmethod def getInstance(cls, irs=None, log=None, scheduler=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(irs, log, scheduler) return cls._instance def _createAcceptor(self, host, port): sslctx = sslutils.create_ssl_context() self._reactor = Reactor() self._acceptor = MultiProtocolAcceptor(self._reactor, host, port, sslctx) def _connectToBroker(self): if config.getboolean('vars', 'broker_enable'): broker_address = config.get('addresses', 'broker_address') broker_port = config.getint('addresses', 'broker_port') request_queues = config.get('addresses', 'request_queues') sslctx = sslutils.create_ssl_context() sock = socket.socket() sock.connect((broker_address, broker_port)) if sslctx: sock = sslctx.wrapSocket(sock) self._broker_client = StompClient(sock, self._reactor) for destination in request_queues.split(","): self._subscriptions[destination] = StompRpcServer( self.bindings['jsonrpc'].server, self._broker_client, destination, broker_address, config.getint('vars', 'connection_stats_timeout')) def _prepareXMLRPCBinding(self): if config.getboolean('vars', 'xmlrpc_enable'): try: from rpc.bindingxmlrpc import BindingXMLRPC from rpc.bindingxmlrpc import XmlDetector except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') else: xml_binding = BindingXMLRPC(self, self.log) self.bindings['xmlrpc'] = xml_binding xml_detector = XmlDetector(xml_binding) self._acceptor.add_detector(xml_detector) def _prepareJSONRPCBinding(self): if config.getboolean('vars', 'jsonrpc_enable'): try: from rpc import Bridge from rpc.bindingjsonrpc import BindingJsonRpc from yajsonrpc.stompreactor import StompDetector except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') else: bridge = Bridge.DynamicBridge() json_binding = BindingJsonRpc( bridge, self._subscriptions, config.getint('vars', 'connection_stats_timeout'), self._scheduler) self.bindings['jsonrpc'] = json_binding stomp_detector = StompDetector(json_binding) self._acceptor.add_detector(stomp_detector) def _prepareMOM(self): momconf = config.get("mom", "conf") self.mom = MomClient(momconf) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] self._acceptor.stop() for binding in self.bindings.values(): binding.stop() self._enabled = False secret.clear() self.channelListener.stop() self._hostStats.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def start(self): for binding in self.bindings.values(): binding.start() self.thread = concurrent.thread(self._reactor.process_requests, name='Reactor thread') self.thread.start() def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: device = drive['device'] # PDIV drive format if device == 'disk' and isVdsmImage(drive): res = self.irs.prepareImage(drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] # The order of imgVolumesInfo is not guaranteed drive['volumeChain'] = res['imgVolumesInfo'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: res = self.irs.getDevicesVisibility([drive["GUID"]]) if not res["visible"][drive["GUID"]]: raise vm.VolumeError(drive) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: raise vm.VolumeError(drive) # Update size for LUN volume drive["truesize"] = res['truesize'] drive["apparentsize"] = res['apparentsize'] volPath = res['path'] # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # cdrom and floppy drives elif (device in ('cdrom', 'floppy') and 'specParams' in drive): params = drive['specParams'] if 'vmPayload' in params: volPath = self._prepareVolumePathFromPayload( vmId, device, params['vmPayload']) # next line can be removed in future, when < 3.3 engine # is not supported elif (params.get('path', '') == '' and drive.get('path', '') == ''): volPath = '' else: volPath = drive.get('path', '') elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def _prepareVolumePathFromPayload(self, vmId, device, payload): """ param vmId: VM UUID or None param device: either 'floppy' or 'cdrom' param payload: a dict formed like this: {'volId': 'volume id', # volId is optional 'file': {'filename': 'content', ...}} """ funcs = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} if device not in funcs: raise vm.VolumeError("Unsupported 'device': %s" % device) func = getattr(supervdsm.getProxy(), funcs[device]) return func(vmId, payload['file'], payload.get('volId')) def teardownVolumePath(self, drive): res = {'status': doneCode} try: if isVdsmImage(drive): res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except TypeError: # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def getDiskAlignment(self, drive): """ Returns the alignment of the disk partitions param drive: is either {"poolID": , "domainID": , "imageID": , "volumeID": } or {"GUID": } Return type: a dictionary with partition names as keys and True for aligned partitions and False for unaligned as values """ aligning = {} volPath = self.prepareVolumePath(drive) try: out = alignmentScan.scanImage(volPath) for line in out: aligning[line.partitionName] = line.alignmentScanResult finally: self.teardownVolumePath(drive) return {'status': doneCode, 'alignment': aligning} def createVm(self, vmParams, vmRecover=False): with self.vmContainerLock: if not vmRecover: if vmParams['vmId'] in self.vmContainer: return errCode['exist'] vm = Vm(self, vmParams, vmRecover) self.vmContainer[vmParams['vmId']] = vm vm.run() return {'status': doneCode, 'vmList': vm.status()} def getAllVmStats(self): return [v.getStats() for v in self.vmContainer.values()] def createStompClient(self, client_socket): if 'jsonrpc' in self.bindings: json_binding = self.bindings['jsonrpc'] reactor = json_binding.reactor return reactor.createClient(client_socket) else: raise JsonRpcBindingsError() def _recoverThread(self): # Trying to run recover process until it works. During that time vdsm # stays in recovery mode (_recover=True), means all api requests # returns with "vdsm is in initializing process" message. utils.retry(self._recoverExistingVms, sleep=5) def _recoverExistingVms(self): start_time = utils.monotonic_time() try: self.log.debug('recovery: started') # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) migration.SourceThread.setMaxOutgoingMigrations(mog) recovery.all_vms(self) # recover stage 3: waiting for domains to go up self._waitForDomainsUp() recovery.clean_vm_files(self) self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations self._waitForStoragePool() self._preparePathsForRecoveredVMs() self.log.info('recovery: completed in %is', utils.monotonic_time() - start_time) except: self.log.exception("recovery: failed") raise def dispatchLibvirtEvents(self, conn, dom, *args): try: eventid = args[-1] vmid = dom.UUIDString() v = self.vmContainer.get(vmid) if not v: self.log.debug('unknown vm %s eventid %s args %s', vmid, eventid, args) return if eventid == libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE: event, detail = args[:-1] v.onLibvirtLifecycleEvent(event, detail, None) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_REBOOT: v.onReboot() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_RTC_CHANGE: utcoffset, = args[:-1] v.onRTCUpdate(utcoffset) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_IO_ERROR_REASON: srcPath, devAlias, action, reason = args[:-1] v.onIOError(devAlias, reason, action) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_GRAPHICS: phase, localAddr, remoteAddr, authScheme, subject = args[:-1] v.log.debug( 'graphics event phase ' '%s localAddr %s remoteAddr %s' 'authScheme %s subject %s', phase, localAddr, remoteAddr, authScheme, subject) if phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE: v.onConnect(remoteAddr['node'], remoteAddr['service']) elif phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT: v.onDisconnect(clientIp=remoteAddr['node'], clientPort=remoteAddr['service']) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_WATCHDOG: action, = args[:-1] v.onWatchdogEvent(action) else: v.log.warning('unknown eventid %s args %s', eventid, args) except: self.log.error("Error running VM callback", exc_info=True) def _waitForDomainsUp(self): while self._enabled: launching = sum( int(v.lastStatus == vmstatus.WAIT_FOR_LAUNCH) for v in self.vmContainer.values()) if not launching: break else: self.log.info('recovery: waiting for %d domains to go up', launching) time.sleep(1) def _waitForStoragePool(self): while (self._enabled and self.vmContainer and not self.irs.getConnectedStoragePoolsList()['poollist']): self.log.info('recovery: waiting for storage pool to go up') time.sleep(5) def _preparePathsForRecoveredVMs(self): vm_objects = self.vmContainer.values() num_vm_objects = len(vm_objects) for idx, vm_obj in enumerate(vm_objects): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: self.log.info( 'recovery [%d/%d]: preparing paths for' ' domain %s', idx + 1, num_vm_objects, vm_obj.id) vm_obj.preparePaths( vm_obj.devSpecMapFromConf()[hwclass.DISK]) except: self.log.exception("recovery [%d/%d]: failed for vm %s", idx + 1, num_vm_objects, vm_obj.id)
class clientIF(object): """ The client interface of vdsm. Exposes vdsm verbs as json-rpc or xml-rpc functions. """ _instance = None _instanceLock = threading.Lock() def __init__(self, irs, log, scheduler): """ Initialize the (single) clientIF instance :param irs: a Dispatcher object to be used as this object's irs. :type irs: :class:`storage.dispatcher.Dispatcher` :param log: a log object to be used for this object's logging. :type log: :class:`logging.Logger` """ self.vmContainerLock = threading.Lock() self._networkSemaphore = threading.Semaphore() self._shutdownSemaphore = threading.Semaphore() self.irs = irs if self.irs: self._contEIOVmsCB = partial(clientIF.contEIOVms, proxy(self)) self.irs.registerDomainStateChangeCallback(self._contEIOVmsCB) self.log = log self._recovery = True self.channelListener = Listener(self.log) self._generationID = str(uuid.uuid4()) self.mom = None self.bindings = {} self._broker_client = None self._subscriptions = defaultdict(list) self._scheduler = scheduler if _glusterEnabled: self.gluster = gapi.GlusterApi(self, log) else: self.gluster = None try: self.vmContainer = {} self._hostStats = sampling.HostStatsThread( sampling.host_samples) self._hostStats.start() self.lastRemoteAccess = 0 self._enabled = True self._netConfigDirty = False self._prepareMOM() secret.clear() concurrent.thread(self._recoverThread, name='clientIFinit').start() self.channelListener.settimeout( config.getint('vars', 'guest_agent_timeout')) self.channelListener.start() self.threadLocal = threading.local() self.threadLocal.client = '' host = config.get('addresses', 'management_ip') port = config.getint('addresses', 'management_port') self._createAcceptor(host, port) self._prepareXMLRPCBinding() self._prepareJSONRPCBinding() self._connectToBroker() except: self.log.error('failed to init clientIF, ' 'shutting down storage dispatcher') if self.irs: self.irs.prepareForShutdown() raise def getVMs(self): """ Get a snapshot of the currently registered VMs. Return value will be a dict of {vmUUID: VM_object} """ with self.vmContainerLock: return self.vmContainer.copy() @property def ready(self): return (self.irs is None or self.irs.ready) and not self._recovery def notify(self, event_id, **kwargs): """ Send notification using provided subscription id as event_id and a dictionary as event body. Before sending there is notify_time added on top level to the dictionary. """ notification = Notification( event_id, self._send_notification, ) notification.emit(**kwargs) def _send_notification(self, message): self.bindings['jsonrpc'].reactor.server.send(message, config.get('addresses', 'event_queue')) def contEIOVms(self, sdUUID, isDomainStateValid): # This method is called everytime the onDomainStateChange # event is emitted, this event is emitted even when a domain goes # INVALID if this happens there is nothing to do if not isDomainStateValid: return libvirtCon = libvirtconnection.get() libvirtVms = libvirtCon.listAllDomains( libvirt.VIR_CONNECT_LIST_DOMAINS_PAUSED) with self.vmContainerLock: self.log.info("vmContainerLock acquired") for libvirtVm in libvirtVms: state = libvirtVm.state(0) if state[1] == libvirt.VIR_DOMAIN_PAUSED_IOERROR: vmId = libvirtVm.UUIDString() vmObj = self.vmContainer[vmId] if sdUUID in vmObj.sdIds: self.log.info("Cont vm %s in EIO", vmId) vmObj.cont() @classmethod def getInstance(cls, irs=None, log=None, scheduler=None): with cls._instanceLock: if cls._instance is None: if log is None: raise Exception("Logging facility is required to create " "the single clientIF instance") else: cls._instance = clientIF(irs, log, scheduler) return cls._instance def _createAcceptor(self, host, port): sslctx = sslutils.create_ssl_context() self._reactor = Reactor() self._acceptor = MultiProtocolAcceptor(self._reactor, host, port, sslctx) def _connectToBroker(self): if config.getboolean('vars', 'broker_enable'): broker_address = config.get('addresses', 'broker_address') broker_port = config.getint('addresses', 'broker_port') request_queues = config.get('addresses', 'request_queues') sslctx = sslutils.create_ssl_context() sock = socket.socket() sock.connect((broker_address, broker_port)) if sslctx: sock = sslctx.wrapSocket(sock) self._broker_client = StompClient(sock, self._reactor) for destination in request_queues.split(","): self._subscriptions[destination] = StompRpcServer( self.bindings['jsonrpc'].server, self._broker_client, destination, broker_address, config.getint('vars', 'connection_stats_timeout') ) def _prepareXMLRPCBinding(self): if config.getboolean('vars', 'xmlrpc_enable'): try: from rpc.bindingxmlrpc import BindingXMLRPC from rpc.bindingxmlrpc import XmlDetector except ImportError: self.log.error('Unable to load the xmlrpc server module. ' 'Please make sure it is installed.') else: xml_binding = BindingXMLRPC(self, self.log) self.bindings['xmlrpc'] = xml_binding xml_detector = XmlDetector(xml_binding) self._acceptor.add_detector(xml_detector) def _prepareJSONRPCBinding(self): if config.getboolean('vars', 'jsonrpc_enable'): try: from rpc import Bridge from rpc.bindingjsonrpc import BindingJsonRpc from yajsonrpc.stompreactor import StompDetector except ImportError: self.log.warn('Unable to load the json rpc server module. ' 'Please make sure it is installed.') else: bridge = Bridge.DynamicBridge() json_binding = BindingJsonRpc( bridge, self._subscriptions, config.getint('vars', 'connection_stats_timeout'), self._scheduler) self.bindings['jsonrpc'] = json_binding stomp_detector = StompDetector(json_binding) self._acceptor.add_detector(stomp_detector) def _prepareMOM(self): momconf = config.get("mom", "conf") self.mom = MomClient(momconf) def prepareForShutdown(self): """ Prepare server for shutdown. Should be called before taking server down. """ if not self._shutdownSemaphore.acquire(blocking=False): self.log.debug('cannot run prepareForShutdown concurrently') return errCode['unavail'] try: if not self._enabled: self.log.debug('cannot run prepareForShutdown twice') return errCode['unavail'] self._acceptor.stop() for binding in self.bindings.values(): binding.stop() self._enabled = False secret.clear() self.channelListener.stop() self._hostStats.stop() if self.irs: return self.irs.prepareForShutdown() else: return {'status': doneCode} finally: self._shutdownSemaphore.release() def start(self): for binding in self.bindings.values(): binding.start() self.thread = concurrent.thread(self._reactor.process_requests, name='Reactor thread') self.thread.start() def _getUUIDSpecPath(self, uuid): try: return blkid.getDeviceByUuid(uuid) except blkid.BlockIdException: self.log.info('Error finding path for device', exc_info=True) raise vm.VolumeError(uuid) def prepareVolumePath(self, drive, vmId=None): if type(drive) is dict: device = drive['device'] # PDIV drive format if device == 'disk' and isVdsmImage(drive): res = self.irs.prepareImage( drive['domainID'], drive['poolID'], drive['imageID'], drive['volumeID']) if res['status']['code']: raise vm.VolumeError(drive) volPath = res['path'] # The order of imgVolumesInfo is not guaranteed drive['volumeChain'] = res['imgVolumesInfo'] drive['volumeInfo'] = res['info'] # GUID drive format elif "GUID" in drive: res = self.irs.getDevicesVisibility([drive["GUID"]]) if not res["visible"][drive["GUID"]]: raise vm.VolumeError(drive) res = self.irs.appropriateDevice(drive["GUID"], vmId) if res['status']['code']: raise vm.VolumeError(drive) # Update size for LUN volume drive["truesize"] = res['truesize'] drive["apparentsize"] = res['apparentsize'] volPath = res['path'] # UUID drive format elif "UUID" in drive: volPath = self._getUUIDSpecPath(drive["UUID"]) # cdrom and floppy drives elif (device in ('cdrom', 'floppy') and 'specParams' in drive): params = drive['specParams'] if 'vmPayload' in params: volPath = self._prepareVolumePathFromPayload( vmId, device, params['vmPayload']) # next line can be removed in future, when < 3.3 engine # is not supported elif (params.get('path', '') == '' and drive.get('path', '') == ''): volPath = '' else: volPath = drive.get('path', '') elif "path" in drive: volPath = drive['path'] else: raise vm.VolumeError(drive) # For BC sake: None as argument elif not drive: volPath = drive # For BC sake: path as a string. elif os.path.exists(drive): volPath = drive else: raise vm.VolumeError(drive) self.log.info("prepared volume path: %s", volPath) return volPath def _prepareVolumePathFromPayload(self, vmId, device, payload): """ param vmId: VM UUID or None param device: either 'floppy' or 'cdrom' param payload: a dict formed like this: {'volId': 'volume id', # volId is optional 'file': {'filename': 'content', ...}} """ funcs = {'cdrom': 'mkIsoFs', 'floppy': 'mkFloppyFs'} if device not in funcs: raise vm.VolumeError("Unsupported 'device': %s" % device) func = getattr(supervdsm.getProxy(), funcs[device]) return func(vmId, payload['file'], payload.get('volId')) def teardownVolumePath(self, drive): res = {'status': doneCode} try: if isVdsmImage(drive): res = self.irs.teardownImage(drive['domainID'], drive['poolID'], drive['imageID']) except TypeError: # paths (strings) are not deactivated if not isinstance(drive, basestring): self.log.warning("Drive is not a vdsm image: %s", drive, exc_info=True) return res['status']['code'] def getDiskAlignment(self, drive): """ Returns the alignment of the disk partitions param drive: is either {"poolID": , "domainID": , "imageID": , "volumeID": } or {"GUID": } Return type: a dictionary with partition names as keys and True for aligned partitions and False for unaligned as values """ aligning = {} volPath = self.prepareVolumePath(drive) try: out = alignmentScan.scanImage(volPath) for line in out: aligning[line.partitionName] = line.alignmentScanResult finally: self.teardownVolumePath(drive) return {'status': doneCode, 'alignment': aligning} def createVm(self, vmParams, vmRecover=False): with self.vmContainerLock: if not vmRecover: if vmParams['vmId'] in self.vmContainer: return errCode['exist'] vm = Vm(self, vmParams, vmRecover) self.vmContainer[vmParams['vmId']] = vm vm.run() return {'status': doneCode, 'vmList': vm.status()} def getAllVmStats(self): return [v.getStats() for v in self.vmContainer.values()] def createStompClient(self, client_socket): if 'jsonrpc' in self.bindings: json_binding = self.bindings['jsonrpc'] reactor = json_binding.reactor return reactor.createClient(client_socket) else: raise JsonRpcBindingsError() def _recoverThread(self): # Trying to run recover process until it works. During that time vdsm # stays in recovery mode (_recover=True), means all api requests # returns with "vdsm is in initializing process" message. utils.retry(self._recoverExistingVms, sleep=5) def _recoverExistingVms(self): start_time = utils.monotonic_time() try: self.log.debug('recovery: started') # Starting up libvirt might take long when host under high load, # we prefer running this code in external thread to avoid blocking # API response. mog = min(config.getint('vars', 'max_outgoing_migrations'), caps.CpuTopology().cores()) migration.SourceThread.setMaxOutgoingMigrations(mog) recovery.all_vms(self) # recover stage 3: waiting for domains to go up self._waitForDomainsUp() recovery.clean_vm_files(self) self._recovery = False # Now if we have VMs to restore we should wait pool connection # and then prepare all volumes. # Actually, we need it just to get the resources for future # volumes manipulations self._waitForStoragePool() self._preparePathsForRecoveredVMs() self.log.info('recovery: completed in %is', utils.monotonic_time() - start_time) except: self.log.exception("recovery: failed") raise def dispatchLibvirtEvents(self, conn, dom, *args): try: eventid = args[-1] vmid = dom.UUIDString() v = self.vmContainer.get(vmid) if not v: self.log.debug('unknown vm %s eventid %s args %s', vmid, eventid, args) return if eventid == libvirt.VIR_DOMAIN_EVENT_ID_LIFECYCLE: event, detail = args[:-1] v.onLibvirtLifecycleEvent(event, detail, None) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_REBOOT: v.onReboot() elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_RTC_CHANGE: utcoffset, = args[:-1] v.onRTCUpdate(utcoffset) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_IO_ERROR_REASON: srcPath, devAlias, action, reason = args[:-1] v.onIOError(devAlias, reason, action) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_GRAPHICS: phase, localAddr, remoteAddr, authScheme, subject = args[:-1] v.log.debug('graphics event phase ' '%s localAddr %s remoteAddr %s' 'authScheme %s subject %s', phase, localAddr, remoteAddr, authScheme, subject) if phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_INITIALIZE: v.onConnect(remoteAddr['node'], remoteAddr['service']) elif phase == libvirt.VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT: v.onDisconnect(clientIp=remoteAddr['node'], clientPort=remoteAddr['service']) elif eventid == libvirt.VIR_DOMAIN_EVENT_ID_WATCHDOG: action, = args[:-1] v.onWatchdogEvent(action) else: v.log.warning('unknown eventid %s args %s', eventid, args) except: self.log.error("Error running VM callback", exc_info=True) def _waitForDomainsUp(self): while self._enabled: launching = sum(int(v.lastStatus == vmstatus.WAIT_FOR_LAUNCH) for v in self.vmContainer.values()) if not launching: break else: self.log.info( 'recovery: waiting for %d domains to go up', launching) time.sleep(1) def _waitForStoragePool(self): while (self._enabled and self.vmContainer and not self.irs.getConnectedStoragePoolsList()['poollist']): self.log.info('recovery: waiting for storage pool to go up') time.sleep(5) def _preparePathsForRecoveredVMs(self): vm_objects = self.vmContainer.values() num_vm_objects = len(vm_objects) for idx, vm_obj in enumerate(vm_objects): # Let's recover as much VMs as possible try: # Do not prepare volumes when system goes down if self._enabled: self.log.info( 'recovery [%d/%d]: preparing paths for' ' domain %s', idx+1, num_vm_objects, vm_obj.id) vm_obj.preparePaths( vm_obj.devSpecMapFromConf()[hwclass.DISK]) except: self.log.exception( "recovery [%d/%d]: failed for vm %s", idx+1, num_vm_objects, vm_obj.id)