Пример #1
0
 def hb_fo(self):
     try:
         fd = os.open(self.dev, self.flags)
         fo = os.fdopen(fd, 'rb+')
     except OSError as exc:
         if exc.errno == errno.EINVAL:
             raise ex.excAbortAction("%s directio is not supported" %
                                     self.dev)
         else:
             raise ex.excAbortAction("error opening %s: %s" %
                                     (self.dev, str(exc)))
     except Exception as exc:
         raise ex.excAbortAction("error opening %s: %s" %
                                 (self.dev, str(exc)))
     try:
         yield fo
     except Exception as exc:
         self.log.error("%s: %s", self.dev, exc)
     finally:
         # closing fo also closes fd
         try:
             os.fsync(fd)
         except OSError as exc:
             self.duplog("error", "%(exc)s", exc=str(exc), nodename="")
         fo.close()
Пример #2
0
 def get_cmd(self, action, script_arg=None, validate=True):
     key = action + "_seq"
     val = getattr(self, key)
     if val in (None, False):
         raise ex.excAbortAction()
     try:
         int(val)
         cmd = [self.script, script_arg if script_arg else action]
     except (TypeError, ValueError):
         try:
             val = convert_boolean(val)
             if val is False:
                 raise ex.excAbortAction()
             cmd = [self.script, script_arg if script_arg else action]
         except ex.excAbortAction:
             raise
         except:
             if six.PY2:
                 cmd = map(lambda s: s.decode('utf8'),
                           shlex.split(val.encode('utf8')))
             else:
                 cmd = shlex.split(val)
             if "|" in cmd or "||" in cmd or "&&" in cmd or any(
                 [True for w in cmd if w.endswith(";")]):
                 return val
     if validate:
         cmd = self.validate_on_action(cmd)
     return cmd
Пример #3
0
 def validate_cluster_global_expect(self, global_expect):
     if global_expect is None:
         return
     if global_expect == "thawed" and shared.DAEMON_STATUS.get(
             "monitor", {}).get("frozen") == "thawed":
         raise ex.excAbortAction("cluster is already thawed")
     if global_expect == "frozen" and shared.DAEMON_STATUS.get(
             "monitor", {}).get("frozen") == "frozen":
         raise ex.excAbortAction("cluster is already frozen")
Пример #4
0
 def _configure(self):
     if self.name not in shared.NODE.cd:
         # this thread will be stopped. don't reconfigure to avoid logging errors
         return
     self.get_hb_nodes()
     self.peer_config = {}
     self.timeout = shared.NODE.oget(self.name, "timeout")
     try:
         self.relay = shared.NODE.oget(self.name, "relay")
     except Exception:
         raise ex.excAbortAction("no %s.relay is not set in node.conf" % self.name)
     try:
         self.secret = shared.NODE.oget(self.name, "secret")
     except Exception:
         raise ex.excAbortAction("no %s.secret is not set in node.conf" % self.name)
Пример #5
0
    def validate_global_expect(self, path, global_expect, thr=None):
        if global_expect is None:
            return
        if global_expect in ("frozen", "aborted", "provisioned"):
            # allow provision target state on just-created service
            return

        # wait for object to appear
        for i in range(5):
            instances = thr.get_service_instances(path)
            if instances:
                break
            if not is_service(path):
                break
            time.sleep(1)
        if not instances:
            raise ex.excError("object does not exist")

        ges = set()
        for nodename, _data in instances.items():
            smon = _data.get("monitor", {})
            ge = smon.get("global_expect")
            ges.add(ge)
            if global_expect == ge:
                continue
            status = smon.get("status", "unknown")
            if status == "tocing" and global_expect == "placed":
                # Allow the "toc" action with the "switch" monitor_action
                # to change status from "tocing" to "start failed".
                pass
            elif status != "idle" and "failed" not in status and "wait" not in status:
                raise ex.excError("%s instance on node %s in %s state"
                                  "" % (path, nodename, status))

        if ges == set([global_expect]):
            raise ex.excAbortAction("%s is already targeting %s" %
                                    (path, global_expect))

        if global_expect not in ("started", "stopped"):
            return
        agg = Storage(shared.AGG.get(path, {}))
        if global_expect == "started" and agg.avail == "up":
            raise ex.excAbortAction("%s is already started" % path)
        elif global_expect == "stopped" and agg.avail in ("down", "stdby down",
                                                          "stdby up"):
            raise ex.excAbortAction("%s is already stopped" % path)
        if agg.avail in ("n/a", "undef"):
            raise ex.excAbortAction()
Пример #6
0
    def _configure(self):
        if self.name not in shared.NODE.cd:
            # this thread will be stopped. don't reconfigure to avoid logging errors
            return

        self.get_hb_nodes()
        self.peer_config = {}

        if not hasattr(self, "meta_slot_buff"):
            self.meta_slot_buff = mmap.mmap(-1, 2 * mmap.PAGESIZE)
        if not hasattr(self, "slot_buff"):
            self.slot_buff = mmap.mmap(-1, self.SLOTSIZE)

        self.timeout = shared.NODE.oget(self.name, "timeout")
        try:
            new_dev = shared.NODE.oget(self.name, "dev")
        except ex.RequiredOptNotFound:
            raise ex.excAbortAction("no %s.dev is not set in node.conf" %
                                    self.name)

        if not os.path.exists(new_dev):
            raise ex.excAbortAction("%s does not exist" % new_dev)

        new_dev = os.path.realpath(new_dev)
        new_flags = os.O_RDWR
        statinfo = os.stat(new_dev)
        if rcEnv.sysname == "Linux":
            if stat.S_ISBLK(statinfo.st_mode):
                self.log.info("using directio")
                new_flags |= os.O_DIRECT | os.O_SYNC | os.O_DSYNC  # (Darwin, SunOS) pylint: disable=no-member
            else:
                raise ex.excAbortAction("%s must be a block device" % new_dev)
        else:
            if not stat.S_ISCHR(statinfo.st_mode):
                raise ex.excAbortAction("%s must be a char device" % new_dev)

        if new_dev != self.dev:
            self.dev = new_dev
            self.flags = new_flags
            self.peer_config = {}
            self.log.info("set dev=%s", self.dev)

        with self.hb_fo() as fo:
            self.load_peer_config(fo=fo)
Пример #7
0
 def write_slot(self, slot, data, fo=None):
     if len(data) > self.SLOTSIZE:
         self.log.error("attempt to write too long data in slot %d", slot)
         raise ex.excAbortAction()
     self.slot_buff.seek(0)
     self.slot_buff.write(data)
     offset = self.slot_offset(slot)
     fo.seek(offset, os.SEEK_SET)
     fo.write(self.slot_buff)
     fo.flush()
Пример #8
0
    def validate_destination_node(self, path, global_expect, thr=None):
        """
        For a placed@<dst> <global_expect> (move action) on <path>,

        Raise an excError if
        * the object <path> does not exist
        * the object <path> topology is failover and more than 1
          destination node was specified
        * the specified destination is not a object candidate node
        * no destination node specified
        * an empty destination node is specified in a list of destination
          nodes

        Raise an excAbortAction if
        * the avail status of the instance on the destination node is up
        """
        if global_expect is None:
            return
        try:
            global_expect, destination_nodes = global_expect.split("@", 1)
        except ValueError:
            return
        if global_expect != "placed":
            return
        instances = thr.get_service_instances(path)
        if not instances:
            raise ex.excError("object does not exist")
        if destination_nodes == "<peer>":
            instance = list(instances.values())[0]
            if instance.get("topology") == "flex":
                raise ex.excError("no destination node specified")
            else:
                nodes = [node for node, inst in instances.items() \
                              if inst.get("avail") not in ("up", "warn", "n/a") and \
                              inst.get("monitor", {}).get("status") != "started"]
                count = len(nodes)
                if count == 0:
                    raise ex.excError("no candidate destination node")
                svc = thr.get_service(path)
                return "placed@%s" % thr.placement_ranks(svc, nodes)[0]
        else:
            destination_nodes = destination_nodes.split(",")
            count = len(destination_nodes)
            if count == 0:
                raise ex.excError("no destination node specified")
            instance = list(instances.values())[0]
            if count > 1 and instance.get("topology") == "failover":
                raise ex.excError(
                    "only one destination node can be specified for "
                    "a failover service")
            for destination_node in destination_nodes:
                if not destination_node:
                    raise ex.excError("empty destination node")
                if destination_node not in instances:
                    raise ex.excError("destination node %s has no %s instance" % \
                                      (destination_node, path))
                instance = instances[destination_node]
                if instance["avail"] == "up":
                    raise ex.excAbortAction(
                        "instance on destination node %s is "
                        "already up" % destination_node)