예제 #1
0
    def inquire(cls):
        """
        Inquire sanlock daemon and return list of resources dicts owned by
        current process.

        See help(sanlock.inquire) for more info.
        """
        with cls._process_lock:
            # If we don't have a process fd, we cannot have any lease.
            if cls._process_fd is None:
                return []

            try:
                # pylint: disable=no-member
                resources = sanlock.inquire(slkfd=cls._process_fd)
            except sanlock.SanlockException as e:
                # See acquire() on why we must panic.
                if cls._lease_count > 0 and e.errno == errno.EPIPE:
                    panic("Sanlock process fd was closed while "
                          "holding {} leases: {}"
                          .format(cls._lease_count, e))

                raise se.SanlockInquireError(e.errno, str(e))

            # Sanlock require bytes values for locksapce and resource names,
            # but we work internally with strings.
            for r in resources:
                r["lockspace"] = r["lockspace"].decode("utf-8")
                r["resource"] = r["resource"].decode("utf-8")

            return resources
예제 #2
0
파일: supervdsm.py 프로젝트: nirs/vdsm
    def _connect(self):
        self._manager = _SuperVdsmManager(address=ADDRESS, authkey=b'')
        self._manager.register('instance')
        self._manager.register('open')
        self._log.debug("Trying to connect to Super Vdsm")
        try:
            function.retry(
                self._manager.connect, Exception, timeout=60, tries=3)
        except Exception as ex:
            msg = "Connect to supervdsm service failed: %s" % ex
            panic(msg)

        # pylint: disable=no-member
        self._svdsm = self._manager.instance()
예제 #3
0
파일: supervdsm.py 프로젝트: xin49/vdsm
    def _connect(self):
        self._manager = _SuperVdsmManager(address=ADDRESS, authkey='')
        self._manager.register('instance')
        self._manager.register('open')
        self._log.debug("Trying to connect to Super Vdsm")
        try:
            function.retry(self._manager.connect,
                           Exception,
                           timeout=60,
                           tries=3)
        except Exception as ex:
            msg = "Connect to supervdsm service failed: %s" % ex
            panic(msg)

        # pylint: disable=no-member
        self._svdsm = self._manager.instance()
예제 #4
0
    def release(self, lease):
        self.log.info("Releasing %s", lease)

        with self._lock, SANLock._process_lock:
            try:
                sanlock.release(self._lockspace_name,
                                lease.name.encode("utf-8"),
                                [(lease.path, lease.offset)],
                                slkfd=SANLock._process_fd)
            except sanlock.SanlockException as e:
                # See acquire() on why we must panic.
                if SANLock._lease_count > 0 and e.errno == errno.EPIPE:
                    panic("Sanlock process fd was closed while "
                          "holding {} leases: {}".format(
                              SANLock._lease_count, e))

                raise se.ReleaseLockFailure(self._sdUUID, e)

            SANLock._lease_count -= 1

        self.log.info("Successfully released %s", lease)
예제 #5
0
    def _check(self):
        try:
            resources = self._sd.inquireClusterLock()
        except se.SanlockInquireError as e:
            if e.is_temporary():
                if self._errors < self._max_errors:
                    # We will check again later in the next montioring
                    # cycle.
                    self._errors += 1
                    log.warning("Error (%s/%s) checking cluster lock %s",
                                self._errors, self._max_errors, self._lease)
                    return

            panic("Error checking cluster lock {}".format(self._lease))
        except Exception:
            panic("Unexpected error checking cluster lock {}".format(
                self._lease))

        # Reset errors on succesful inquire.
        self._errors = 0

        for r in resources:
            if r["lockspace"] != self._lease.lockspace:
                continue

            if r["resource"] != self._lease.resource:
                continue

            # Validate the cluster lease.

            if r["disks"] != [self._lease.disk]:
                panic("Invalid cluster lock disk exepcted={} actual={}".format(
                    self._lease, r))

            log.debug("Found cluster lock %s", r)
            return

        panic("Cluster lock {} was lost".format(self._lease))
예제 #6
0
    def acquire(self, hostId, lease, lvb=False):
        if lvb and not supports_lvb:
            raise se.UnsupportedOperation(
                "This sanlock version does not support LVB")

        self.log.info("Acquiring %s for host id %s, lvb=%s",
                      lease, hostId, lvb)

        # If host id was acquired by this thread, this will return immediately.
        # If host is id being acquired asynchronically by the domain monitor,
        # wait until the domain monitor find that host id was acquired.
        #
        # IMPORTANT: This must be done *before* entering the lock. Once we
        # enter the lock, the domain monitor cannot check if host id was
        # acquired, since hasHostId() is using the same lock.
        if not self._ready.wait(self.ACQUIRE_HOST_ID_TIMEOUT):
            raise se.AcquireHostIdFailure(
                "Timeout acquiring host id, cannot acquire %s (id=%s)"
                % (lease, hostId))

        with self._lock, SANLock._process_lock:
            while True:
                if SANLock._process_fd is None:
                    try:
                        SANLock._process_fd = sanlock.register()
                    except sanlock.SanlockException as e:
                        raise se.AcquireLockFailure(
                            self._sdUUID, e.errno,
                            "Cannot register to sanlock", str(e))

                    self.log.info("Using sanlock process fd %d",
                                  SANLock._process_fd)

                # TODO: remove once sanlock 3.8.3 is available on centos.
                extra_args = {"lvb": lvb} if supports_lvb else {}

                try:
                    sanlock.acquire(
                        self._lockspace_name,
                        lease.name.encode("utf-8"),
                        [(lease.path, lease.offset)],
                        slkfd=SANLock._process_fd,
                        **extra_args)
                except sanlock.SanlockException as e:
                    if e.errno != errno.EPIPE:
                        raise se.AcquireLockFailure(
                            self._sdUUID, e.errno,
                            "Cannot acquire %s" % (lease,), str(e))

                    # If we hold leases, we just lost them, since sanlock is
                    # releasing all process leases when the process fd is
                    # closed. The only way to recover is to panic; child
                    # processes run by vdsm will be killed, and vdsm will lose
                    # the SPM role.
                    if SANLock._lease_count > 0:
                        panic("Sanlock process fd was closed while "
                              "holding {} leases: {}"
                              .format(SANLock._lease_count, e))

                    self.log.warning("Sanlock process fd was closed: %s", e)
                    SANLock._process_fd = None
                    continue

                SANLock._lease_count += 1
                break

        self.log.info("Successfully acquired %s for host id %s", lease, hostId)
예제 #7
0
파일: vdsmd.py 프로젝트: kkoojjyy/vdsm
def serve_clients(log):
    cif = None
    irs = None
    scheduler = None
    running = [True]

    def sigtermHandler(signum, frame):
        log.info("Received signal %s, shutting down" % signum)
        running[0] = False

    def sigusr1Handler(signum, frame):
        if irs:
            log.info("Received signal %s, stopping SPM" % signum)
            # pylint: disable=no-member
            # TODO remove when side effect removed from HSM.__init__ and
            # initialize it in line #63
            irs.spmStop(irs.getConnectedStoragePoolsList()['poollist'][0])

    sigutils.register()
    signal.signal(signal.SIGTERM, sigtermHandler)
    signal.signal(signal.SIGUSR1, sigusr1Handler)
    zombiereaper.registerSignalHandler()

    profile.start()
    metrics.start()

    libvirtconnection.start_event_loop()

    try:
        if config.getboolean('irs', 'irs_enable'):
            try:
                irs = Dispatcher(HSM())
            except:
                panic("Error initializing IRS")

        scheduler = schedule.Scheduler(name="vdsm.Scheduler",
                                       clock=time.monotonic_time)
        scheduler.start()

        from vdsm.clientIF import clientIF  # must import after config is read
        cif = clientIF.getInstance(irs, log, scheduler)

        jobs.start(scheduler, cif)

        install_manhole({'irs': irs, 'cif': cif})

        cif.start()

        init_unprivileged_network_components(cif)

        periodic.start(cif, scheduler)
        health.start()
        try:
            while running[0]:
                sigutils.wait_for_signal()

            profile.stop()
        finally:
            metrics.stop()
            health.stop()
            periodic.stop()
            cif.prepareForShutdown()
            jobs.stop()
            scheduler.stop()
    finally:
        libvirtconnection.stop_event_loop(wait=False)
예제 #8
0
 def handle_close(self, dispatcher):
     # We cannot handle this, so the best way is to die loudly.
     panic.panic("Listen socket was closed: %s" % dispatcher.socket)
예제 #9
0
from __future__ import absolute_import
from __future__ import division
import os

from vdsm.common import panic

# Create new process group so panic will not kill the test runner.
os.setpgid(0, 0)

panic.panic("panic test")
예제 #10
0
파일: vdsmd.py 프로젝트: nirs/vdsm
def serve_clients(log):
    cif = None
    irs = None
    scheduler = None
    running = [True]

    def sigtermHandler(signum, frame):
        log.info("Received signal %s, shutting down" % signum)
        running[0] = False

    def sigusr1Handler(signum, frame):
        if irs:
            log.info("Received signal %s, stopping SPM" % signum)
            # pylint: disable=no-member
            # TODO remove when side effect removed from HSM.__init__ and
            # initialize it in line #63
            irs.spmStop(
                irs.getConnectedStoragePoolsList()['poollist'][0])

    sigutils.register()
    signal.signal(signal.SIGTERM, sigtermHandler)
    signal.signal(signal.SIGUSR1, sigusr1Handler)
    zombiereaper.registerSignalHandler()

    profile.start()
    metrics.start()

    libvirtconnection.start_event_loop()

    try:
        if config.getboolean('irs', 'irs_enable'):
            try:
                irs = Dispatcher(HSM())
            except:
                panic("Error initializing IRS")

        scheduler = schedule.Scheduler(name="vdsm.Scheduler",
                                       clock=time.monotonic_time)
        scheduler.start()

        from vdsm.clientIF import clientIF  # must import after config is read
        cif = clientIF.getInstance(irs, log, scheduler)

        jobs.start(scheduler, cif)

        install_manhole({'irs': irs, 'cif': cif})

        cif.start()

        init_unprivileged_network_components(cif, supervdsm.getProxy())

        periodic.start(cif, scheduler)
        health.start()
        try:
            while running[0]:
                sigutils.wait_for_signal()

            profile.stop()
        finally:
            metrics.stop()
            health.stop()
            periodic.stop()
            cif.prepareForShutdown()
            jobs.stop()
            scheduler.stop()
    finally:
        libvirtconnection.stop_event_loop(wait=False)
예제 #11
0
 def handle_close(self, dispatcher):
     # We cannot handle this, so the best way is to die loudly.
     panic.panic("Listen socket was closed: %s" % dispatcher.socket)