Пример #1
0
    def __init__(self, fs, server, index, dev, jdev=None, group=None,
            tag=None, enabled=True, mode='managed', network=None,
            active='yes'):
        """
        Initialize a Lustre target object.
        """
        Disk.__init__(self, dev)
        self._states = {}
        Component.__init__(self, fs, server, enabled, mode, active)

        self.defaultserver = server      # Default server the target runs on
        self.failservers = ServerGroup() # All failover servers

        assert index is not None
        self.index = int(index)
        self.group = group
        self.tag = tag
        self.network = network
        self.mntdev = self.dev
        self.recov_info = None

        if jdev:
            self.journal = Journal(self, jdev)
        else:
            self.journal = None

        # If target mode is external then set target state accordingly
        if self.is_external():
            self.state = EXTERNAL

        # If target is inactive, then set target state accordingly
        if not self.is_active():
            self.state = INACTIVE
Пример #2
0
 def testSelect(self):
     """test ServerGroup.select()"""
     srv1 = Server('foo1', ['foo1@tcp'])
     srv2 = Server('foo2', ['foo2@tcp'])
     srv3 = Server('foo3', ['foo3@tcp'])
     grp = ServerGroup([srv1, srv2, srv3])
     subgrp = grp.select(NodeSet("foo[1,3]"))
     self.assertEqual(list(iter(subgrp)), [srv1, srv3])
Пример #3
0
 def testSelect(self):
     """test ServerGroup.select()"""
     srv1 = Server("foo1", ["foo1@tcp"])
     srv2 = Server("foo2", ["foo2@tcp"])
     srv3 = Server("foo3", ["foo3@tcp"])
     grp = ServerGroup([srv1, srv2, srv3])
     subgrp = grp.select(NodeSet("foo[1,3]"))
     self.assertEqual(list(iter(subgrp)), [srv1, srv3])
Пример #4
0
    def testSimple(self):
        """test ServerGroup simple tests"""
        grp = ServerGroup()
        self.assertEqual(len(grp), 0)

        srv = Server('foo', ['foo@tcp'])
        grp.append(srv)
        self.assertEqual(len(grp), 1)
        self.assertEqual(grp[0], srv)
Пример #5
0
    def testSimple(self):
        """test ServerGroup simple tests"""
        grp = ServerGroup()
        self.assertEqual(len(grp), 0)

        srv = Server("foo", ["foo@tcp"])
        grp.append(srv)
        self.assertEqual(len(grp), 1)
        self.assertEqual(grp[0], srv)
Пример #6
0
    def testDistant(self):
        """test ServerGroup.nodeset()"""
        fqdn = socket.getfqdn()
        shortname = socket.gethostname().split('.', 1)[0]

        srv1 = Server(shortname, ['%s@tcp' % shortname])
        srv2 = Server('foo', ['foo@tcp'])
        grp = ServerGroup([srv1, srv2])
        subgrp = grp.distant()
        self.assertEqual(list(iter(subgrp)), [srv2])
Пример #7
0
    def testDistant(self):
        """test ServerGroup.nodeset()"""
        fqdn = socket.getfqdn()
        shortname = socket.gethostname().split(".", 1)[0]

        srv1 = Server(shortname, ["%s@tcp" % shortname])
        srv2 = Server("foo", ["foo@tcp"])
        grp = ServerGroup([srv1, srv2])
        subgrp = grp.distant()
        self.assertEqual(list(iter(subgrp)), [srv2])
Пример #8
0
 def allservers(self):
     """
     Return all servers this target can run on.
     The default server is the first element, then all possible failover servers.
     """
     #XXX: This method could be possibly dropped if the code in Status
     #     command is optimized.
     grp = ServerGroup([self.defaultserver])
     for srv in self.failservers:
         grp.append(srv)
     return grp
Пример #9
0
    def test_start_failover(self):
        """start on a failover node"""
        srv2 = Server('fakenode', ['127.0.0.2@tcp'])

        mgt = self.fs.new_target(self.srv1, 'mgt', 0, self.disk1.name)
        mdt = self.fs.new_target(self.srv1, 'mdt', 0, self.disk2.name)
        mdt.add_server(srv2)
        self.assertEqual(self.fs.format(), set([OFFLINE]))

        # For a simpler test environment, simulate local node is the failover
        # node.
        # This could be improved when --servicenode will be supported. Format
        # will be possible directly in failover configuration (no need to
        # reconfig anymore).
        mdt.state = None
        mdt.defaultserver = srv2
        mdt.failservers = ServerGroup()
        mdt.add_server(self.srv1)

        # Fail over this local node (-F HOSTNAME -n HOSTNAME)
        mdt.failover(self.srv1.hostname)
        srv2.action_enabled = False

        # Start should succeed and detect migration
        self.assertEqual(self.fs.start(), set([MIGRATED]))
        self.assertEqual(mgt.state, MOUNTED)
        self.assertEqual(mdt.state, MIGRATED)
Пример #10
0
 def testNodeSet(self):
     """test ServerGroup.nodeset()"""
     srv1 = Server('foo1', ['foo1@tcp'])
     srv2 = Server('foo2', ['foo2@tcp'])
     grp = ServerGroup([srv1, srv2])
     self.assertEqual(grp.nodeset(), NodeSet('foo[1-2]'))
Пример #11
0
 def testIter(self):
     """test ServerGroup.__iter__()"""
     srv1 = Server('foo1', ['foo1@tcp'])
     srv2 = Server('foo2', ['foo2@tcp'])
     grp = ServerGroup([srv1, srv2])
     self.assertEqual(list(iter(grp)), [srv1, srv2])
Пример #12
0
class Target(Component, Disk):

    #
    # Text form for different client states. 
    #
    # Could be nearly merged with Target state_text_map if MOUNTED value
    # becomes the same.
    STATE_TEXT_MAP = { 
        None:          "unknown",
        EXTERNAL:      "external", 
        RECOVERING:    "recovering", 
        OFFLINE:       "offline", 
        TARGET_ERROR:  "ERROR", 
        MOUNTED:       "online", 
        RUNTIME_ERROR: "CHECK FAILURE",
        INACTIVE:      "inactive",
        MIGRATED:      "migrated"
    }

    def __init__(self, fs, server, index, dev, jdev=None, group=None,
            tag=None, enabled=True, mode='managed', network=None,
            active='yes'):
        """
        Initialize a Lustre target object.
        """
        Disk.__init__(self, dev)
        self._states = {}
        Component.__init__(self, fs, server, enabled, mode, active)

        self.defaultserver = server      # Default server the target runs on
        self.failservers = ServerGroup() # All failover servers

        assert index is not None
        self.index = int(index)
        self.group = group
        self.tag = tag
        self.network = network
        self.mntdev = self.dev
        self.recov_info = None

        if jdev:
            self.journal = Journal(self, jdev)
        else:
            self.journal = None

        # If target mode is external then set target state accordingly
        if self.is_external():
            self.state = EXTERNAL

        # If target is inactive, then set target state accordingly
        if not self.is_active():
            self.state = INACTIVE

    @property
    def label(self):
        """Return the target label which match the Lustre target name."""
        return "%s-%s%04x" % (self.fs.fs_name, self.TYPE.upper(), self.index)

    def __lt__(self, other):
        return self.START_ORDER < other.START_ORDER

    def uniqueid(self):
        """
        Return a unique string representing this target.

        This matches the Target label.
        """
        # uniqueid is used when the target is added to a filesystem.
        # We cannot use the target servers list because it can changed when
        # add_server() is called.
        return self.label

    def get_state(self):
        """Compute target global state based on remote nodes results."""
        # Group target's remote nodes statuses by state.
        sdict = {}
        sorted_states = sorted(self._states.iteritems(), key=itemgetter(1))
        for state, nodes in groupby(sorted_states, key=itemgetter(1)):
            sdict[state] = map(itemgetter(0), nodes)

        if None in sdict and len(sdict[None]) == len(self._states):
            return None

        elif INACTIVE in sdict:
            return INACTIVE

        elif MOUNTED in sdict:
            if len(sdict[MOUNTED]) > 1 or RECOVERING in sdict:
                return TARGET_ERROR
            elif str(self.defaultserver.hostname) in sdict[MOUNTED]:
                return MOUNTED
            else:
                return MIGRATED

        elif RECOVERING in sdict:
            if len(sdict[RECOVERING]) > 1 or MOUNTED in sdict:
                return TARGET_ERROR
            else:
                return RECOVERING

        elif OFFLINE in sdict:
            return OFFLINE

        elif TARGET_ERROR in sdict:
            return TARGET_ERROR

        elif RUNTIME_ERROR in sdict:
            return RUNTIME_ERROR

    def set_state(self, value):
        """Update target state on the current node."""
        self._states[str(self.server.hostname)] = value

    state = property(get_state, set_state)

    def get_local_state(self):
        """Get local server's target state."""
        if self.fs.local_server is None:
            return None

        return self._states[str(self.fs.local_server.hostname)]

    def set_local_state(self, value):
        """Set local server's target state."""
        if self.fs.local_server is not None:
            self._states[str(self.fs.local_server.hostname)] = value

    local_state = property(get_local_state, set_local_state)

    def sanitize_state(self, nodes=None):
        """
        Clean component state if it is wrong.
        """
        for nodename in nodes:
            if self._states[nodename] is None:
                self._states[nodename] = RUNTIME_ERROR

    def update(self, other):
        """
        Update my serializable fields from other/distant object.
        """
        Disk.update(self, other)
        # We used to call Component.update(). Be careful if it is updated.
        srvname = str(other.server.hostname)
        self._states[srvname] = other._states[srvname]
        if self._states[srvname] == RECOVERING:
            # Compat v0.910: 'recov_info' value depends on remote version
            self.recov_info = getattr(other, 'recov_info',
                                      getattr(other, 'status_info', None))
        self.index = other.index

        # other could be a pre shine 1.5 object, in this case, let's report it.
        if getattr(other, '_compat', False) is True:
            msg = "WARNING: shine version mismatch !!!\n" \
                  "\tPartial results may show up.\n" \
                  "\tMigrated targets may not be detected.\n" \
                  "\tTo avoid this, please synchronize shine versions."
            self.fs._handle_shine_proxy_error(srvname, msg)

    def add_server(self, server):
        assert isinstance(server, Server)
        self.failservers.append(server)
        self._states[str(server.hostname)] = None

    def update_server(self):
        """
        Compute and set component's server based on remote nodes results.

        If the component is started, server is the one on which it is started.
        If not started but a state is available on only one server, use it.

        Return False if target is started more than once, True otherwise.
        """
        srvname = None

        servers = [srv for srv, state in self._states.iteritems()
                   if state in (MOUNTED, RECOVERING)]
        if len(servers) > 1:
            return False
        elif len(servers) == 1:
            srvname = servers[0]
        else:
            servers = [srv for srv, state in self._states.iteritems()
                       if state is not None]
            if len(servers) == 1:
                srvname = servers[0]
            # Maybe we should do something if len(servers) > 1?

        if srvname is not None:
            self.server = self.allservers().select(NodeSet(srvname))[0]

        return True

    def allservers(self):
        """
        Return all servers this target can run on.
        The default server is the first element, then all possible failover servers.
        """
        #XXX: This method could be possibly dropped if the code in Status
        #     command is optimized.
        grp = ServerGroup([self.defaultserver])
        for srv in self.failservers:
            grp.append(srv)
        return grp

    def failover(self, candidates):
        """
        Helper method to change Target current server based on a candidate list.

        It checks if only one server from the candidate list matches one of the
        failover server of this target. If more than one matches, it
        raises an exception. If no server matches it returns False. If it has
        changes the current server, it returns true.
        """
        intersec = self.failservers.select(candidates)

        # If we have more than one possible failover nodes, it is ambiguous
        if len(intersec) > 1:
            raise ComponentError(self, "More than one failover server matches.")

        if len(intersec) == 1:
            self.server = intersec[0]
            return True

        return False


    def get_id(self):
        """
        Get target human readable identifier.
        """
        if self.tag is not None:
            return self.tag

        return self.label

    def longtext(self):
        """
        Return the target name and device
        """
        return "%s (%s)" % (self.label, self.dev)

    def get_nids(self):
        """
        Return an ordered list of target's NIDs.
        """
        return [s.nids for s in self.allservers()]

    def text_status(self):
        """
        Return a human text form for the target state.
        """
        state = Component.text_status(self)
        if self.state not in (TARGET_ERROR, RUNTIME_ERROR) and \
           set((RUNTIME_ERROR, TARGET_ERROR)) & set(self._states.values()):
            state += "*"
        if self.state == RECOVERING:
            state += " for %s" % self.recov_info
        return state

    #
    # Target sanity checks
    #

    def full_check(self, mountdata=True):
        """
        Sanity checks for device files and Lustre status.
        If mountdata is set to False, target content will not be analyzed.
        """

        # check for disk level status
        try:
            self._device_check()
            if mountdata:
                self._mountdata_check(self.label)

            if self.journal:
                self.journal.full_check()

        except (ComponentError, DiskDeviceError), error:
            self.local_state = TARGET_ERROR
            raise ComponentError(self, str(error))

        # check for Lustre level status
        self.lustre_check()
Пример #13
0
 def allservers(self):
     """
     Return all servers this target can run on. On standard component
     there is only one server.
     """
     return ServerGroup([self.server])
Пример #14
0
 def testNodeSet(self):
     """test ServerGroup.nodeset()"""
     srv1 = Server("foo1", ["foo1@tcp"])
     srv2 = Server("foo2", ["foo2@tcp"])
     grp = ServerGroup([srv1, srv2])
     self.assertEqual(grp.nodeset(), NodeSet("foo[1-2]"))
Пример #15
0
class Target(Component, Disk):

    #
    # Text form for different client states.
    #
    # Could be nearly merged with Target state_text_map if MOUNTED value
    # becomes the same.
    STATE_TEXT_MAP = {
        None: "unknown",
        EXTERNAL: "external",
        RECOVERING: "recovering",
        OFFLINE: "offline",
        TARGET_ERROR: "ERROR",
        MOUNTED: "online",
        RUNTIME_ERROR: "CHECK FAILURE",
        INACTIVE: "inactive",
        MIGRATED: "migrated"
    }

    def __init__(self,
                 fs,
                 server,
                 index,
                 dev,
                 jdev=None,
                 group=None,
                 tag=None,
                 enabled=True,
                 mode='managed',
                 network=None,
                 active='yes'):
        """
        Initialize a Lustre target object.
        """
        Disk.__init__(self, dev)
        self._states = {}
        Component.__init__(self, fs, server, enabled, mode, active)

        self.defaultserver = server  # Default server the target runs on
        self.failservers = ServerGroup()  # All failover servers

        assert index is not None
        self.index = int(index)
        self.group = group
        self.tag = tag
        self.network = network
        self.mntdev = self.dev
        self.recov_info = None

        if jdev:
            self.journal = Journal(self, jdev)
        else:
            self.journal = None

        # If target mode is external then set target state accordingly
        if self.is_external():
            self.state = EXTERNAL

        # If target is inactive, then set target state accordingly
        if not self.is_active():
            self.state = INACTIVE

    @property
    def label(self):
        """Return the target label which match the Lustre target name."""
        return "%s-%s%04x" % (self.fs.fs_name, self.TYPE.upper(), self.index)

    def __lt__(self, other):
        return self.START_ORDER < other.START_ORDER

    def uniqueid(self):
        """
        Return a unique string representing this target.

        This matches the Target label.
        """
        # uniqueid is used when the target is added to a filesystem.
        # We cannot use the target servers list because it can changed when
        # add_server() is called.
        return self.label

    def get_state(self):
        """Compute target global state based on remote nodes results."""
        # Group target's remote nodes statuses by state.
        sdict = {}
        sorted_states = sorted(self._states.items(), key=itemgetter(1))
        for state, nodes in groupby(sorted_states, key=itemgetter(1)):
            sdict[state] = list(map(itemgetter(0), nodes))

        if None in sdict and len(sdict[None]) == len(self._states):
            return None

        elif INACTIVE in sdict:
            return INACTIVE

        elif MOUNTED in sdict:
            if len(sdict[MOUNTED]) > 1 or RECOVERING in sdict:
                return TARGET_ERROR
            elif str(self.defaultserver.hostname) in sdict[MOUNTED]:
                return MOUNTED
            else:
                return MIGRATED

        elif RECOVERING in sdict:
            if len(sdict[RECOVERING]) > 1 or MOUNTED in sdict:
                return TARGET_ERROR
            else:
                return RECOVERING

        elif OFFLINE in sdict:
            return OFFLINE

        elif TARGET_ERROR in sdict:
            return TARGET_ERROR

        elif RUNTIME_ERROR in sdict:
            return RUNTIME_ERROR

    def set_state(self, value):
        """Update target state on the current node."""
        self._states[str(self.server.hostname)] = value

    state = property(get_state, set_state)

    def get_local_state(self):
        """Get local server's target state."""
        if self.fs.local_server is None:
            return None

        return self._states[str(self.fs.local_server.hostname)]

    def set_local_state(self, value):
        """Set local server's target state."""
        if self.fs.local_server is not None:
            self._states[str(self.fs.local_server.hostname)] = value

    local_state = property(get_local_state, set_local_state)

    def sanitize_state(self, nodes=None):
        """
        Clean component state if it is wrong.
        """
        for nodename in nodes:
            if self._states[nodename] is None:
                self._states[nodename] = RUNTIME_ERROR

    def update(self, other):
        """
        Update my serializable fields from other/distant object.
        """
        Disk.update(self, other)
        # We used to call Component.update(). Be careful if it is updated.
        srvname = str(other.server.hostname)
        self._states[srvname] = other._states[srvname]
        if self._states[srvname] == RECOVERING:
            # Compat v0.910: 'recov_info' value depends on remote version
            self.recov_info = getattr(other, 'recov_info',
                                      getattr(other, 'status_info', None))
        self.index = other.index

        # other could be a pre shine 1.5 object, in this case, let's report it.
        if getattr(other, '_compat', False) is True:
            msg = "WARNING: shine version mismatch !!!\n" \
                  "\tPartial results may show up.\n" \
                  "\tMigrated targets may not be detected.\n" \
                  "\tTo avoid this, please synchronize shine versions."
            self.fs._handle_shine_proxy_error(srvname, msg)

    def add_server(self, server):
        assert isinstance(server, Server)
        self.failservers.append(server)
        self._states[str(server.hostname)] = None

    def update_server(self):
        """
        Compute and set component's server based on remote nodes results.

        If the component is started, server is the one on which it is started.
        If not started but a state is available on only one server, use it.

        Return False if target is started more than once, True otherwise.
        """
        srvname = None

        servers = [
            srv for srv, state in self._states.items()
            if state in (MOUNTED, RECOVERING)
        ]
        if len(servers) > 1:
            return False
        elif len(servers) == 1:
            srvname = servers[0]
        else:
            servers = [
                srv for srv, state in self._states.items() if state is not None
            ]
            if len(servers) == 1:
                srvname = servers[0]
            # Maybe we should do something if len(servers) > 1?

        if srvname is not None:
            self.server = self.allservers().select(NodeSet(srvname))[0]

        return True

    def allservers(self):
        """
        Return all servers this target can run on.
        The default server is the first element, then all possible failover servers.
        """
        #XXX: This method could be possibly dropped if the code in Status
        #     command is optimized.
        grp = ServerGroup([self.defaultserver])
        for srv in self.failservers:
            grp.append(srv)
        return grp

    def failover(self, candidates):
        """
        Helper method to change Target current server based on a candidate list.

        It checks if only one server from the candidate list matches one of the
        failover server of this target. If more than one matches, it
        raises an exception. If no server matches it returns False. If it has
        changes the current server, it returns true.
        """
        intersec = self.failservers.select(candidates)

        # If we have more than one possible failover nodes, it is ambiguous
        if len(intersec) > 1:
            raise ComponentError(self,
                                 "More than one failover server matches.")

        if len(intersec) == 1:
            self.server = intersec[0]
            return True

        return False

    def get_id(self):
        """
        Get target human readable identifier.
        """
        if self.tag is not None:
            return self.tag

        return self.label

    def longtext(self):
        """
        Return the target name and device
        """
        return "%s (%s)" % (self.label, self.dev)

    def get_nids(self):
        """
        Return an ordered list of target's NIDs.
        """
        return [s.nids for s in self.allservers()]

    def text_status(self):
        """
        Return a human text form for the target state.
        """
        state = Component.text_status(self)
        if self.state not in (TARGET_ERROR, RUNTIME_ERROR) and \
           set((RUNTIME_ERROR, TARGET_ERROR)) & set(self._states.values()):
            state += "*"
        if self.state == RECOVERING:
            state += " for %s" % self.recov_info
        return state

    #
    # Target sanity checks
    #

    def full_check(self, mountdata=True):
        """
        Sanity checks for device files and Lustre status.
        If mountdata is set to False, target content will not be analyzed.
        """

        # check for disk level status
        try:
            self._device_check()
            if mountdata:
                self._mountdata_check(self.label)

            if self.journal:
                self.journal.full_check()

        except (ComponentError, DiskDeviceError) as error:
            self.local_state = TARGET_ERROR
            raise ComponentError(self, str(error))

        # check for Lustre level status
        self.lustre_check()

    def lustre_check(self):
        """
        Check target health at Lustre level.
        """

        self.local_state = None  # Unknown

        # find lustre parameters in procfs or sysfs
        # (Since Lustre 2.4, more than one path could be returned.
        #  The first one is fine. Since 2.13 it will be in sysfs.)
        mntdev_path = glob('/sys/fs/lustre/*/%s/mntdev' % self.label)
        if len(mntdev_path) == 0:
            mntdev_path = glob('/proc/fs/lustre/*/%s/mntdev' % self.label)

        recov_path = glob('/proc/fs/lustre/*/%s/recovery_status' % self.label)
        assert len(recov_path) <= 1

        # check for label presence in /proc : is this lustre target started?
        if len(mntdev_path) == 0 and len(recov_path) == 0:
            self.local_state = OFFLINE
        elif len(mntdev_path) == 0:
            self.local_state = TARGET_ERROR
            raise ComponentError(self, "incoherent state in " \
                                       "/proc/fs/lustre for %s" % self.label)
        else:
            # get target's real device
            fproc = open(mntdev_path[0])
            try:
                self.mntdev = fproc.readline().rstrip('\n')
            finally:
                fproc.close()

            loaded = True

            # check for presence in /proc/mounts
            f_proc_mounts = open("/proc/mounts", 'r')
            try:
                for line in f_proc_mounts:
                    if line.find("%s " % self.mntdev) == 0:
                        if line.split(' ', 3)[2] == "lustre":
                            if loaded:
                                self.local_state = MOUNTED
                            else:
                                self.local_state = TARGET_ERROR
                                raise ComponentError(self, "multiple " \
                                        " mounts detected for %s" % self.label)
            finally:
                f_proc_mounts.close()

            if self.local_state != MOUNTED and loaded:
                self.local_state = TARGET_ERROR
                # up but not mounted = incoherent state
                # check for loaded state: ST, UP...
                raise ComponentError(self, "incoherent state for %s " \
                                     "(started but not mounted?)" % self.label)

            if self.local_state == MOUNTED and not loaded:
                self.local_state = TARGET_ERROR
                # mounted but not up = incoherent state
                # /etc/fstab was not correctly cleaned
                raise ComponentError(self, "incoherent state for %s " \
                                     "(mounted but not started?)" % self.label)

            if self.local_state == MOUNTED and self.TYPE != MGT.TYPE:
                # check for MDT or OST recovery (MGS doesn't make any recovery)
                try:
                    fproc = open(recov_path[0], 'r')
                except (IOError, IndexError):
                    self.local_state = TARGET_ERROR
                    raise ComponentError(self, "recovery_state file not " \
                                                  "found for %s" % self.label)

                try:

                    for line in fproc:
                        if line.startswith("status:"):
                            status = line.rstrip().split(' ', 2)[1]
                            break


#
# Recovering information depends on Lustre version.
#
# VERSION:                2.0            1.8                     1.6
#
# connected_clients:  connect/TOTAL   connect/TOTAL            connect/TOTAL
# req_replay:         req_replay      ---                      ---
# lock_repay:         lock_replay     ---                      ---
# delayed_client:     ---             delay/TOTAL              ---
# completed_clients:  connect-replay  TOTAL-recov-delay/TOTAL  TOTAL-recov/TOTAL
# evicted_clients:    stale           ---                      ---
#
                    if status == "RECOVERING":
                        time_remaining = "??"
                        completed = -1
                        evicted = 0
                        total = 0
                        for line in fproc:
                            line = line.strip()
                            if line.startswith("time_remaining:"):
                                time_remaining = line.split(' ', 1)[1]
                            elif line.startswith("connected_clients:"):
                                total = int(line.split('/', 1)[1])
                            elif line.startswith("evicted_clients:"):
                                evicted = int(line.split(' ', 1)[1])
                            elif line.startswith("completed_clients:"):
                                completed = line.split(' ', 1)[1]
                                completed = int(completed.split('/', 1)[0])
                        self.local_state = RECOVERING
                        self.recov_info = "%ss (%s/%s)" % (
                            time_remaining, completed + evicted, total)
                finally:
                    fproc.close()

    #
    # Helper methods to check component state in Actions.
    #

    def is_started(self):
        """Return True if the target device is mounted."""
        return self.local_state in (MOUNTED, RECOVERING)

    def raise_if_started(self, message):
        """Raise a ComponentError if the target device is mounted."""
        if self.local_state != OFFLINE:
            if self.is_started():
                reason = "%s: target %s (%s) is started"
            else:
                reason = "%s: target %s (%s) is busy"
            self.local_state = TARGET_ERROR
            raise ComponentError(self,
                                 reason % (message, self.label, self.dev))

    #
    # Target actions
    #

    def format(self, **kwargs):
        """
        Check the target is correct and not used and format it in Lustre
        format.
        """
        action = Format(self, **kwargs)
        if self.journal:
            action.depends_on(JournalFormat(self.journal, **kwargs))
        return action

    def tunefs(self, **kwargs):
        """
        Apply all on-disk metadata using Target description and tunefs.lustre
        command.
        """
        return Tunefs(self, **kwargs)

    def fsck(self, **kwargs):
        """
        Apply a filesystem coherency check on the Target. This does not
        check coherency between several targets.
        """
        return Fsck(self, **kwargs)

    def start(self, **kwargs):
        """Start the local Target and check for system sanity."""
        return StartTarget(self, **kwargs)

    def stop(self, **kwargs):
        """Stop the local Target and check for system sanity."""
        return StopTarget(self, **kwargs)

    def __setstate__(self, state):
        """
        Enforce pickle backward compatibility with older servers.
        Before shine 1.5, Target objects have a inherited 'state' attribute
        and no '_states' dictionary.
        Begining with shine 1.5, Target objects have a '_states' dictionary
        and a 'state' property (forbidding access to the inherited
        'state' attribute)
        """
        self.__dict__.update(state)
        if not hasattr(self, '_states'):
            # Remote object is a pre shine 1.5 object.
            # Create and initialize new style _states attribute.
            setattr(self, '_states',
                    {str(self.server.hostname): state['state']})

            # Add this flag to be able to later notify user of this.
            setattr(self, '_compat', True)
Пример #16
0
class Target(Component, Disk):

    #
    # Text form for different client states. 
    #
    # Could be nearly merged with Target state_text_map if MOUNTED value
    # becomes the same.
    STATE_TEXT_MAP = { 
        None:          "unknown",
        EXTERNAL:      "external", 
        RECOVERING:    "recovering", 
        OFFLINE:       "offline", 
        TARGET_ERROR:  "ERROR", 
        MOUNTED:       "online", 
        RUNTIME_ERROR: "CHECK FAILURE" 
    }

    def __init__(self, fs, server, index, dev, jdev=None, group=None,
            tag=None, enabled=True, mode='managed', network=None):
        """
        Initialize a Lustre target object.
        """
        Disk.__init__(self, dev)
        Component.__init__(self, fs, server, enabled, mode)

        self.defaultserver = server      # Default server the target runs on
        self.failservers = ServerGroup() # All failover servers

        assert index is not None
        self.index = int(index)
        self.group = group
        self.tag = tag
        self.network = network
        self.mntdev = self.dev
        self.recov_info = None

        if jdev:
            self.journal = Journal(self, jdev)
        else:
            self.journal = None

        # If target mode is external then set target state accordingly
        if self.is_external():
            self.state = EXTERNAL

    @property
    def label(self):
        """Return the target label which match the Lustre target name."""
        return "%s-%s%04x" % (self.fs.fs_name, self.TYPE.upper(), self.index)

    def __lt__(self, other):
        return self.START_ORDER < other.START_ORDER

    def uniqueid(self):
        """
        Return a unique string representing this target.

        This matches the Target label.
        """
        # uniqueid is used when the target is added to a filesystem.
        # We cannot use the target servers list because it can changed when
        # add_server() is called.
        return self.label

    def update(self, other):
        """
        Update my serializable fields from other/distant object.
        """
        Disk.update(self, other)
        Component.update(self, other)
        self.index = other.index

        # Compat v0.910: 'recov_info' value depends on remote version
        self.recov_info = getattr(other, 'recov_info',
                                  getattr(other, 'status_info', None))

    def add_server(self, server):
        assert isinstance(server, Server)
        self.failservers.append(server)

    def allservers(self):
        """
        Return all servers this target can run on.
        The default server is the first element, then all possible failover servers.
        """
        #XXX: This method could be possibly dropped if the code in Status
        #     command is optimized.
        grp = ServerGroup([self.defaultserver])
        for srv in self.failservers:
            grp.append(srv)
        return grp

    def failover(self, candidates):
        """
        Helper method to change Target current server based on a candidate list.

        It checks if only one server from the candidate list matches one of the
        failover server of this target. If more than one matches, it
        raises an exception. If no server matches it returns False. If it has
        changes the current server, it returns true.
        """
        intersec = self.failservers.select(candidates)

        # If we have more than one possible failover nodes, it is ambiguous
        if len(intersec) > 1:
            raise ComponentError(self, "More than one failover server matches.")

        if len(intersec) == 1:
            self.server = intersec[0]
            return True

        return False


    def get_id(self):
        """
        Get target human readable identifier.
        """
        if self.tag is not None:
            return self.tag

        return self.label
    
    def longtext(self):
        """
        Return the target name and device
        """
        return "%s (%s)" % (self.label, self.dev)

    def get_nids(self):
        """
        Return an ordered list of target's NIDs.
        """
        return [s.nids for s in self.allservers()]

    def text_status(self):
        """
        Return a human text form for the target state.
        """
        if self.state == RECOVERING:
            return "%s for %s" % (self.STATE_TEXT_MAP.get(RECOVERING),
                                  self.recov_info)
        else:
            return Component.text_status(self)

    #
    # Target sanity checks
    #

    def full_check(self, mountdata=True):
        """
        Sanity checks for device files and Lustre status.
        If mountdata is set to False, target content will not be analyzed.
        """

        # check for disk level status
        try:
            self._device_check()
            if mountdata:
                self._mountdata_check(self.label)

            if self.journal:
                self.journal.full_check()

        except (ComponentError, DiskDeviceError), error:
            self.state = TARGET_ERROR
            raise ComponentError(self, str(error))

        # check for Lustre level status
        self.lustre_check()