def inquire(cls): """ Inquire sanlock daemon and return list of resources dicts owned by current process. See help(sanlock.inquire) for more info. """ with cls._process_lock: # If we don't have a process fd, we cannot have any lease. if cls._process_fd is None: return [] try: # pylint: disable=no-member resources = sanlock.inquire(slkfd=cls._process_fd) except sanlock.SanlockException as e: # See acquire() on why we must panic. if cls._lease_count > 0 and e.errno == errno.EPIPE: panic("Sanlock process fd was closed while " "holding {} leases: {}" .format(cls._lease_count, e)) raise se.SanlockInquireError(e.errno, str(e)) # Sanlock require bytes values for locksapce and resource names, # but we work internally with strings. for r in resources: r["lockspace"] = r["lockspace"].decode("utf-8") r["resource"] = r["resource"].decode("utf-8") return resources
def _connect(self): self._manager = _SuperVdsmManager(address=ADDRESS, authkey=b'') self._manager.register('instance') self._manager.register('open') self._log.debug("Trying to connect to Super Vdsm") try: function.retry( self._manager.connect, Exception, timeout=60, tries=3) except Exception as ex: msg = "Connect to supervdsm service failed: %s" % ex panic(msg) # pylint: disable=no-member self._svdsm = self._manager.instance()
def _connect(self): self._manager = _SuperVdsmManager(address=ADDRESS, authkey='') self._manager.register('instance') self._manager.register('open') self._log.debug("Trying to connect to Super Vdsm") try: function.retry(self._manager.connect, Exception, timeout=60, tries=3) except Exception as ex: msg = "Connect to supervdsm service failed: %s" % ex panic(msg) # pylint: disable=no-member self._svdsm = self._manager.instance()
def release(self, lease): self.log.info("Releasing %s", lease) with self._lock, SANLock._process_lock: try: sanlock.release(self._lockspace_name, lease.name.encode("utf-8"), [(lease.path, lease.offset)], slkfd=SANLock._process_fd) except sanlock.SanlockException as e: # See acquire() on why we must panic. if SANLock._lease_count > 0 and e.errno == errno.EPIPE: panic("Sanlock process fd was closed while " "holding {} leases: {}".format( SANLock._lease_count, e)) raise se.ReleaseLockFailure(self._sdUUID, e) SANLock._lease_count -= 1 self.log.info("Successfully released %s", lease)
def _check(self): try: resources = self._sd.inquireClusterLock() except se.SanlockInquireError as e: if e.is_temporary(): if self._errors < self._max_errors: # We will check again later in the next montioring # cycle. self._errors += 1 log.warning("Error (%s/%s) checking cluster lock %s", self._errors, self._max_errors, self._lease) return panic("Error checking cluster lock {}".format(self._lease)) except Exception: panic("Unexpected error checking cluster lock {}".format( self._lease)) # Reset errors on succesful inquire. self._errors = 0 for r in resources: if r["lockspace"] != self._lease.lockspace: continue if r["resource"] != self._lease.resource: continue # Validate the cluster lease. if r["disks"] != [self._lease.disk]: panic("Invalid cluster lock disk exepcted={} actual={}".format( self._lease, r)) log.debug("Found cluster lock %s", r) return panic("Cluster lock {} was lost".format(self._lease))
def acquire(self, hostId, lease, lvb=False): if lvb and not supports_lvb: raise se.UnsupportedOperation( "This sanlock version does not support LVB") self.log.info("Acquiring %s for host id %s, lvb=%s", lease, hostId, lvb) # If host id was acquired by this thread, this will return immediately. # If host is id being acquired asynchronically by the domain monitor, # wait until the domain monitor find that host id was acquired. # # IMPORTANT: This must be done *before* entering the lock. Once we # enter the lock, the domain monitor cannot check if host id was # acquired, since hasHostId() is using the same lock. if not self._ready.wait(self.ACQUIRE_HOST_ID_TIMEOUT): raise se.AcquireHostIdFailure( "Timeout acquiring host id, cannot acquire %s (id=%s)" % (lease, hostId)) with self._lock, SANLock._process_lock: while True: if SANLock._process_fd is None: try: SANLock._process_fd = sanlock.register() except sanlock.SanlockException as e: raise se.AcquireLockFailure( self._sdUUID, e.errno, "Cannot register to sanlock", str(e)) self.log.info("Using sanlock process fd %d", SANLock._process_fd) # TODO: remove once sanlock 3.8.3 is available on centos. extra_args = {"lvb": lvb} if supports_lvb else {} try: sanlock.acquire( self._lockspace_name, lease.name.encode("utf-8"), [(lease.path, lease.offset)], slkfd=SANLock._process_fd, **extra_args) except sanlock.SanlockException as e: if e.errno != errno.EPIPE: raise se.AcquireLockFailure( self._sdUUID, e.errno, "Cannot acquire %s" % (lease,), str(e)) # If we hold leases, we just lost them, since sanlock is # releasing all process leases when the process fd is # closed. The only way to recover is to panic; child # processes run by vdsm will be killed, and vdsm will lose # the SPM role. if SANLock._lease_count > 0: panic("Sanlock process fd was closed while " "holding {} leases: {}" .format(SANLock._lease_count, e)) self.log.warning("Sanlock process fd was closed: %s", e) SANLock._process_fd = None continue SANLock._lease_count += 1 break self.log.info("Successfully acquired %s for host id %s", lease, hostId)
def serve_clients(log): cif = None irs = None scheduler = None running = [True] def sigtermHandler(signum, frame): log.info("Received signal %s, shutting down" % signum) running[0] = False def sigusr1Handler(signum, frame): if irs: log.info("Received signal %s, stopping SPM" % signum) # pylint: disable=no-member # TODO remove when side effect removed from HSM.__init__ and # initialize it in line #63 irs.spmStop(irs.getConnectedStoragePoolsList()['poollist'][0]) sigutils.register() signal.signal(signal.SIGTERM, sigtermHandler) signal.signal(signal.SIGUSR1, sigusr1Handler) zombiereaper.registerSignalHandler() profile.start() metrics.start() libvirtconnection.start_event_loop() try: if config.getboolean('irs', 'irs_enable'): try: irs = Dispatcher(HSM()) except: panic("Error initializing IRS") scheduler = schedule.Scheduler(name="vdsm.Scheduler", clock=time.monotonic_time) scheduler.start() from vdsm.clientIF import clientIF # must import after config is read cif = clientIF.getInstance(irs, log, scheduler) jobs.start(scheduler, cif) install_manhole({'irs': irs, 'cif': cif}) cif.start() init_unprivileged_network_components(cif) periodic.start(cif, scheduler) health.start() try: while running[0]: sigutils.wait_for_signal() profile.stop() finally: metrics.stop() health.stop() periodic.stop() cif.prepareForShutdown() jobs.stop() scheduler.stop() finally: libvirtconnection.stop_event_loop(wait=False)
def handle_close(self, dispatcher): # We cannot handle this, so the best way is to die loudly. panic.panic("Listen socket was closed: %s" % dispatcher.socket)
from __future__ import absolute_import from __future__ import division import os from vdsm.common import panic # Create new process group so panic will not kill the test runner. os.setpgid(0, 0) panic.panic("panic test")
def serve_clients(log): cif = None irs = None scheduler = None running = [True] def sigtermHandler(signum, frame): log.info("Received signal %s, shutting down" % signum) running[0] = False def sigusr1Handler(signum, frame): if irs: log.info("Received signal %s, stopping SPM" % signum) # pylint: disable=no-member # TODO remove when side effect removed from HSM.__init__ and # initialize it in line #63 irs.spmStop( irs.getConnectedStoragePoolsList()['poollist'][0]) sigutils.register() signal.signal(signal.SIGTERM, sigtermHandler) signal.signal(signal.SIGUSR1, sigusr1Handler) zombiereaper.registerSignalHandler() profile.start() metrics.start() libvirtconnection.start_event_loop() try: if config.getboolean('irs', 'irs_enable'): try: irs = Dispatcher(HSM()) except: panic("Error initializing IRS") scheduler = schedule.Scheduler(name="vdsm.Scheduler", clock=time.monotonic_time) scheduler.start() from vdsm.clientIF import clientIF # must import after config is read cif = clientIF.getInstance(irs, log, scheduler) jobs.start(scheduler, cif) install_manhole({'irs': irs, 'cif': cif}) cif.start() init_unprivileged_network_components(cif, supervdsm.getProxy()) periodic.start(cif, scheduler) health.start() try: while running[0]: sigutils.wait_for_signal() profile.stop() finally: metrics.stop() health.stop() periodic.stop() cif.prepareForShutdown() jobs.stop() scheduler.stop() finally: libvirtconnection.stop_event_loop(wait=False)