示例#1
0
 def save(self, update=True, ttl=None):
     super(NodeContext, self).save(update)
     status = self.value + "/status"
     if ttl:
         self._ttl = ttl
         try:
             etcd_utils.refresh(status, ttl)
         except etcd.EtcdKeyNotFound:
             pass
示例#2
0
    def volume_remove_brick_force(self, event):
        time.sleep(self.sync_interval)
        # Event returns bricks list as space separated single string
        bricks = event['message']['bricks'].split(" ")
        for brick in bricks:
            fetched_brick = NS.gluster.objects.Brick(
                fqdn=brick.split(":/")[0],
                brick_dir=brick.split(":/")[1].replace('/', '_')).load()

            try:
                NS._int.wclient.delete(
                    "clusters/{0}/Bricks/all/{1}/{2}".format(
                        NS.tendrl_context.integration_id,
                        brick.split(":/")[0],
                        brick.split(":/")[1].replace('/', '_')),
                    recursive=True,
                )
            except etcd.EtcdKeyNotFound:
                pass

            job_id = monitoring_utils.update_dashboard(
                "%s|%s" % (event['message']['volume'], brick),
                RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id,
                "delete")
            logger.log(
                "debug", NS.publisher_id,
                {"message": "Update dashboard job %s "
                 "created" % job_id})

            job_id = monitoring_utils.delete_resource_from_graphite(
                "%s|%s" % (event['message']['volume'], brick),
                RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id,
                "delete")
            logger.log(
                "debug", NS.publisher_id, {
                    "message":
                    "Delete resource from graphite job %s "
                    "created" % job_id
                })

        volume_brick_path = "clusters/{0}/Volumes/{1}/"\
                            "Bricks".format(
                                NS.tendrl_context.integration_id,
                                fetched_brick.vol_id,
                            )

        # remove all the brick infromation under volume as the
        # subvolume might have changed, let the next sync handle
        # the updation of brick info
        try:
            NS._int.wclient.delete(volume_brick_path, recursive=True)
        except etcd.EtcdKeyNotFound:
            pass

        _trigger_sync_key = 'clusters/%s/_sync_now' % NS.tendrl_context.integration_id
        etcd_utils.write(_trigger_sync_key, 'true')
        etcd_utils.refresh(_trigger_sync_key, self.sync_interval)
示例#3
0
 def save(self, update=True, ttl=None):
     super(NodeContext, self).save(update)
     status = self.value + "/status"
     if ttl:
         self._ttl = ttl
         try:
             etcd_utils.refresh(status, ttl)
         except etcd.EtcdKeyNotFound:
             pass
    def save(self, update=True, ttl=None):
        if not self.hash_compare_with_central_store():
            _volume = NS.gluster.objects.Volume(vol_id=self.vol_id)
            _volume.invalidate_hash()

        super(Brick, self).save(update)
        status = self.value + "/status"
        if ttl:
            etcd_utils.refresh(status, ttl)

        return
示例#5
0
 def push_operation(self):
     etcd_utils.write(
         "/messages/jobs/%s" % self.message.job_id,
         Message.to_json(self.message),
         append=True)
     etcd_utils.refresh(
         "/messages/jobs/%s" % self.message.job_id,
         ttl=NS.config.data['message_retention_time']
     )
     log_message = ("%s:%s") % (
         self.message.job_id,
         self.message.payload["message"])
     return log_message
示例#6
0
 def push_operation(self):
     etcd_utils.write(
         "/messages/jobs/%s" % self.message.job_id,
         Message.to_json(self.message),
         append=True)
     etcd_utils.refresh(
         "/messages/jobs/%s" % self.message.job_id,
         ttl=NS.config.data['message_retention_time']
     )
     log_message = ("%s:%s") % (
         self.message.job_id,
         self.message.payload["message"])
     return log_message
    def save(self, update=True, ttl=None):
        if not self.hash_compare_with_central_store():
            _volume = NS.tendrl.objects.GlusterVolume(
                NS.tendrl_context.integration_id,
                vol_id=self.vol_id
            ).load()
            _volume.invalidate_hash()

        super(Brick, self).save(update)
        status = self.value + "/status"
        if ttl:
            etcd_utils.refresh(status, ttl)

        return
示例#8
0
    def save(self, update=True, ttl=None):
        hash_key_changed = True
        if "Message" not in self.__class__.__name__:
            # If local object.hash is equal to
            # central_store object.hash, return
            if self.hash_compare_with_central_store(ttl=ttl):
                # No change in hashkey
                hash_key_changed = False
        rendered_obj = self.render()
        watchables = self._defs.get("watch_attrs", [])
        if self.__class__.__name__ in ['Config', 'Definition'] or \
            len(watchables) > 0:
            for item in rendered_obj:
                if item['name'] in watchables:
                    _type = self._defs.get("attrs", {}).get(item['name'],
                                                            {}).get("type")
                    if _type and _type.lower() in ['json', 'list'] and \
                        item['value']:
                        try:
                            item['value'] = json.dumps(item['value'])
                        except ValueError:
                            _msg = "Error save() attr %s for object %s" % \
                                   (item['name'], self.__name__)
                            logger.log("debug", NS.publisher_id,
                                       {"message": _msg})
                    if self._ttl and item['name'] in self._attrs_with_ttl:
                        etcd_utils.write(item['key'],
                                         item['value'],
                                         quorum=True,
                                         ttl=self._ttl)
                    else:
                        etcd_utils.write(item['key'],
                                         item['value'],
                                         quorum=True)
        if hash_key_changed:
            data_key = self.value + '/data'
            etcd_utils.write(data_key, self.json)
            updated_at_key = self.value + '/updated_at'
            hash_key = self.value + '/hash'
            etcd_utils.write(updated_at_key, str(time_utils.now()))
            if hasattr(self, 'hash'):
                etcd_utils.write(hash_key, self.hash)

            if ttl:
                etcd_utils.refresh(self.value, ttl)

        self.watch_attrs()
示例#9
0
    def save(self, update=True, ttl=None):
        hash_key_changed = True
        if "Message" not in self.__class__.__name__:
            # If local object.hash is equal to
            # central_store object.hash, return
            if self.hash_compare_with_central_store(ttl=ttl):
                # No change in hashkey
                hash_key_changed = False
        rendered_obj = self.render()
        watchables = self._defs.get("watch_attrs", [])
        if self.__class__.__name__ in ['Config', 'Definition'] or \
            len(watchables) > 0:
            for item in rendered_obj:
                if item['name'] in watchables:
                    _type = self._defs.get("attrs", {}).get(
                        item['name'],
                        {}
                    ).get("type")
                    if _type and _type.lower() in ['json', 'list'] and \
                        item['value']:
                        try:
                            item['value'] = json.dumps(item['value'])
                        except ValueError:
                            _msg = "Error save() attr %s for object %s" % \
                                   (item['name'], self.__name__)
                            logger.log(
                                "debug",
                                NS.publisher_id,
                                {"message": _msg}
                            )
                    etcd_utils.write(item['key'], item['value'], quorum=True)
        if hash_key_changed:
            data_key = self.value + '/data'
            etcd_utils.write(data_key, self.json)
            updated_at_key = self.value + '/updated_at'
            hash_key = self.value + '/hash'
            etcd_utils.write(updated_at_key, str(time_utils.now()))
            if hasattr(self, 'hash'):
                etcd_utils.write(hash_key, self.hash)

            if ttl:
                etcd_utils.refresh(self.value, ttl)

        self.watch_attrs()
示例#10
0
 def hash_compare_with_central_store(self, ttl=None):
     try:
         # Generate current in memory object hash
         self.hash = self._hash()
         _hash_key = "/{0}/hash".format(self.value)
         _stored_hash = None
         try:
             _stored_hash = etcd_utils.read(_hash_key).value
         except etcd.EtcdKeyNotFound:
             return False
         if self.hash == _stored_hash:
             # No changes in stored object and current object,
             # dont save current object to central store
             if ttl:
                 etcd_utils.refresh(self.value, ttl)
             return True
         else:
             return False
     except TypeError:
         # no hash for this object, save the current hash as is
         return False
示例#11
0
 def hash_compare_with_central_store(self, ttl=None):
     try:
         # Generate current in memory object hash
         self.hash = self._hash()
         _hash_key = "/{0}/hash".format(self.value)
         _stored_hash = None
         try:
             _stored_hash = etcd_utils.read(_hash_key).value
         except etcd.EtcdKeyNotFound:
             return False
         if self.hash == _stored_hash:
             # No changes in stored object and current object,
             # dont save current object to central store
             if ttl:
                 etcd_utils.refresh(self.value, ttl)
             return True
         else:
             return False
     except TypeError:
         # no hash for this object, save the current hash as is
         return False
示例#12
0
 def hash_compare_with_central_store(self, ttl=None):
     self.render()
     try:
         # Generate current in memory object hash
         self._hash()
         _hash_key = "/{0}/hash".format(self.value)
         _stored_hash = None
         try:
             _stored_hash = NS._int.client.read(_hash_key).value
         except (etcd.EtcdConnectionFailed, etcd.EtcdException) as ex:
             if type(ex) != etcd.EtcdKeyNotFound:
                 NS._int.reconnect()
                 _stored_hash = NS._int.client.read(_hash_key).value
         if self.hash == _stored_hash:
             # No changes in stored object and current object,
             # dont save current object to central store
             if ttl:
                 etcd_utils.refresh(self.value, ttl)
             return True
         else:
             return False
     except TypeError:
         # no hash for this object, save the current hash as is
         return False
示例#13
0
def test_refresh():
    setattr(__builtin__, "NS", maps.NamedDict())
    setattr(NS, "_int", maps.NamedDict())
    NS._int.wclient = importlib.import_module("tendrl.commons"
                                              ".tests.fixtures."
                                              "client").Client()
    NS._int.wreconnect = type("Dummy", (object, ), {})
    with patch.object(Client, "refresh") as mock_refresh:
        etcd_utils.refresh("test_value", 1)
        assert mock_refresh.assert_called
    with patch.object(Client, "refresh",
                      raise_etcdconnectionfailed) as mock_refresh:
        with pytest.raises(etcd.EtcdConnectionFailed):
            etcd_utils.refresh("test_value", 1)
    with patch.object(Client, "refresh",
                      raise_etcdkeynotfound) as mock_refresh:
        with pytest.raises(etcd.EtcdKeyNotFound):
            etcd_utils.refresh("test_value", 1)
示例#14
0
def test_refresh():
    setattr(__builtin__, "NS", maps.NamedDict())
    setattr(NS, "_int", maps.NamedDict())
    NS._int.wclient = importlib.import_module("tendrl.commons"
                                              ".tests.fixtures."
                                              "client").Client()
    NS._int.wreconnect = type("Dummy", (object,), {})
    with patch.object(Client, "refresh") as mock_refresh:
        etcd_utils.refresh("test_value", 1)
        assert mock_refresh.assert_called
    with patch.object(Client, "refresh",
                      raise_etcdconnectionfailed) as mock_refresh:
        with pytest.raises(etcd.EtcdConnectionFailed):
            etcd_utils.refresh("test_value", 1)
    with patch.object(Client, "refresh",
                      raise_etcdkeynotfound) as mock_refresh:
        with pytest.raises(etcd.EtcdKeyNotFound):
            etcd_utils.refresh("test_value", 1)
示例#15
0
    def save(self, update=True, ttl=None):
        self.render()
        if "Message" not in self.__class__.__name__:
            # If local object.hash is equal to
            # central_store object.hash, return
            if self.hash_compare_with_central_store(ttl=ttl):
                return
        if update:
            current_obj = self.load()
            for attr, val in vars(self).iteritems():
                if isinstance(val, (types.FunctionType,
                                    types.BuiltinFunctionType,
                                    types.MethodType, types.BuiltinMethodType,
                                    types.UnboundMethodType)) or \
                        attr.startswith("_") or attr in ['value', 'list']:
                    continue

                if val is None and hasattr(current_obj, attr):
                    # if self.attr is None, use attr value from central
                    # store (i.e. current_obj.attr)
                    if getattr(current_obj, attr):
                        setattr(self, attr, getattr(current_obj, attr))

        self.updated_at = str(time_utils.now())
        for item in self.render():
            '''
                Note: Log messages in this file have try-except
                blocks to run
                in the condition when the node_agent has not been
                started and
                name spaces are being created.
            '''
            try:
                logger.log("debug", NS.publisher_id, {
                    "message":
                    "Writing %s to %s" % (item['key'], item['value'])
                })
            except KeyError:
                sys.stdout.write("Writing %s to %s \n" %
                                 (item['key'], item['value']))
            # convert list, dict (json) to python based on definitions
            _type = self._defs.get("attrs", {}).get(item['name'],
                                                    {}).get("type")
            if _type:
                if _type.lower() in ['json', 'list']:
                    if item['value']:
                        try:
                            item['value'] = json.dumps(item['value'])
                        except ValueError as ex:
                            _msg = "Error save() attr %s for object %s" % \
                                   (item['name'], self.__name__)
                            Event(
                                ExceptionMessage(priority="debug",
                                                 publisher=NS.publisher_id,
                                                 payload={
                                                     "message": _msg,
                                                     "exception": ex
                                                 }))
            try:
                NS._int.wclient.write(item['key'], item['value'], quorum=True)
            except (etcd.EtcdConnectionFailed, etcd.EtcdException):
                NS._int.wreconnect()
                NS._int.wclient.write(item['key'], item['value'], quorum=True)
        if ttl:
            etcd_utils.refresh(self.value, ttl)

        self.watch_attrs()
示例#16
0
    def save(self, update=True, ttl=None):
        self.render()
        if "Message" not in self.__class__.__name__:
            try:
                # Generate current in memory object hash
                self.hash = self._hash()
                _hash_key = "/{0}/hash".format(self.value)
                _stored_hash = None
                try:
                    _stored_hash = NS._int.client.read(_hash_key).value
                except (etcd.EtcdConnectionFailed, etcd.EtcdException) as ex:
                    if type(ex) != etcd.EtcdKeyNotFound:
                        NS._int.reconnect()
                        _stored_hash = NS._int.client.read(_hash_key).value
                if self.hash == _stored_hash:
                    # No changes in stored object and current object,
                    # dont save current object to central store
                    if ttl:
                        etcd_utils.refresh(self.value, ttl)
                    return
            except TypeError:
                # no hash for this object, save the current hash as is
                pass

        if update:
            current_obj = self.load()
            for attr, val in vars(self).iteritems():
                if isinstance(val, (types.FunctionType,
                                    types.BuiltinFunctionType,
                                    types.MethodType, types.BuiltinMethodType,
                                    types.UnboundMethodType)) or \
                        attr.startswith("_") or attr in ['value', 'list']:
                    continue

                if val is None and hasattr(current_obj, attr):
                    # if self.attr is None, use attr value from central
                    # store (i.e. current_obj.attr)
                    if getattr(current_obj, attr):
                        setattr(self, attr, getattr(current_obj, attr))

        self.updated_at = str(time_utils.now())
        for item in self.render():
            '''
                Note: Log messages in this file have try-except
                blocks to run
                in the condition when the node_agent has not been
                started and
                name spaces are being created.
            '''
            try:
                Event(
                    Message(priority="debug",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Writing %s to %s" %
                                (item['key'], item['value'])
                            }))
            except KeyError:
                sys.stdout.write("Writing %s to %s" %
                                 (item['key'], item['value']))
            # convert list, dict (json) to python based on definitions
            _type = self._defs.get("attrs", {}).get(item['name'],
                                                    {}).get("type")
            if _type:
                if _type.lower() in ['json', 'list']:
                    if item['value']:
                        try:
                            item['value'] = json.dumps(item['value'])
                        except ValueError as ex:
                            _msg = "Error save() attr %s for object %s" % \
                                   (item['name'], self.__name__)
                            Event(
                                ExceptionMessage(priority="debug",
                                                 publisher=NS.publisher_id,
                                                 payload={
                                                     "message": _msg,
                                                     "exception": ex
                                                 }))
            try:
                NS._int.wclient.write(item['key'], item['value'], quorum=True)
            except (etcd.EtcdConnectionFailed, etcd.EtcdException):
                NS._int.wreconnect()
                NS._int.wclient.write(item['key'], item['value'], quorum=True)
        if ttl:
            etcd_utils.refresh(self.value, ttl)
示例#17
0
    def volume_remove_brick_force(self, event):
        time.sleep(self.sync_interval)
        # Event returns bricks list as space separated single string
        bricks = event['message']['bricks'].split(" ")
        try:
            for brick in bricks:
                # find fqdn using ip
                ip = socket.gethostbyname(brick.split(":/")[0])
                node_id = etcd_utils.read("indexes/ip/%s" % ip).value
                fqdn = NS.tendrl.objects.ClusterNodeContext(
                    node_id=node_id
                ).load().fqdn
                brick = fqdn + ":" + brick.split(":")[-1]
                fetched_brick = NS.tendrl.objects.GlusterBrick(
                    NS.tendrl_context.integration_id,
                    fqdn=brick.split(":/")[0],
                    brick_dir=brick.split(":/")[1].replace('/', '_')
                ).load()

                # delete brick
                etcd_utils.delete(
                    "clusters/{0}/Bricks/all/{1}/{2}".format(
                        NS.tendrl_context.integration_id,
                        brick.split(":/")[0],
                        brick.split(":/")[1].replace('/', '_')
                    ),
                    recursive=True,
                )

                # delete alert dashbaord
                job_id = monitoring_utils.update_dashboard(
                    "%s|%s" % (event['message']['volume'], brick),
                    RESOURCE_TYPE_BRICK,
                    NS.tendrl_context.integration_id,
                    "delete"
                )
                logger.log(
                    "debug",
                    NS.publisher_id,
                    {
                        "message": "Update dashboard job %s "
                        "created" % job_id
                    }
                )

                # delete brick details from graphite
                job_id = monitoring_utils.delete_resource_from_graphite(
                    "%s|%s" % (event['message']['volume'], brick),
                    RESOURCE_TYPE_BRICK,
                    NS.tendrl_context.integration_id,
                    "delete"
                )
                logger.log(
                    "debug",
                    NS.publisher_id,
                    {
                        "message": "Delete resource from graphite job %s "
                        "created" % job_id
                    }
                )

            volume_brick_path = "clusters/{0}/Volumes/{1}/"\
                                "Bricks".format(
                                    NS.tendrl_context.integration_id,
                                    fetched_brick.vol_id,
                                )

            # remove all the brick infromation under volume as the
            # subvolume might have changed, let the next sync handle
            # the updation of brick info
            etcd_utils.delete(
                volume_brick_path,
                recursive=True
            )

            _trigger_sync_key = 'clusters/%s/_sync_now' % \
                NS.tendrl_context.integration_id
            etcd_utils.write(_trigger_sync_key, 'true')
            etcd_utils.refresh(_trigger_sync_key, self.sync_interval)
        except etcd.EtcdKeyNotFound:
            logger.log(
                "debug",
                NS.publisher_id,
                {
                    "message": "Unable to delete bricks %s" % bricks
                }
            )
示例#18
0
    def volume_remove_brick_force(self, event):
        time.sleep(self.sync_interval)
        # Event returns bricks list as space separated single string
        bricks = event['message']['bricks'].split(" ")
        try:
            for brick in bricks:
                # find fqdn using ip
                ip = socket.gethostbyname(brick.split(":/")[0])
                node_id = etcd_utils.read("indexes/ip/%s" % ip).value
                fqdn = NS.tendrl.objects.ClusterNodeContext(
                    node_id=node_id).load().fqdn
                brick = fqdn + ":" + brick.split(":")[-1]
                fetched_brick = NS.tendrl.objects.GlusterBrick(
                    NS.tendrl_context.integration_id,
                    fqdn=brick.split(":/")[0],
                    brick_dir=brick.split(":/")[1].replace('/', '_')).load()

                # delete brick
                etcd_utils.delete(
                    "clusters/{0}/Bricks/all/{1}/{2}".format(
                        NS.tendrl_context.integration_id,
                        brick.split(":/")[0],
                        brick.split(":/")[1].replace('/', '_')),
                    recursive=True,
                )

                # delete alert dashbaord
                job_id = monitoring_utils.update_dashboard(
                    "%s|%s" % (event['message']['volume'], brick),
                    RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id,
                    "delete")
                logger.log(
                    "debug", NS.publisher_id,
                    {"message": "Update dashboard job %s "
                     "created" % job_id})

                # delete brick details from graphite
                job_id = monitoring_utils.delete_resource_from_graphite(
                    "%s|%s" % (event['message']['volume'], brick),
                    RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id,
                    "delete")
                logger.log(
                    "debug", NS.publisher_id, {
                        "message":
                        "Delete resource from graphite job %s "
                        "created" % job_id
                    })

            volume_brick_path = "clusters/{0}/Volumes/{1}/"\
                                "Bricks".format(
                                    NS.tendrl_context.integration_id,
                                    fetched_brick.vol_id,
                                )

            # remove all the brick infromation under volume as the
            # subvolume might have changed, let the next sync handle
            # the updation of brick info
            etcd_utils.delete(volume_brick_path, recursive=True)

            _trigger_sync_key = 'clusters/%s/_sync_now' % \
                NS.tendrl_context.integration_id
            etcd_utils.write(_trigger_sync_key, 'true')
            etcd_utils.refresh(_trigger_sync_key, self.sync_interval)
        except etcd.EtcdKeyNotFound:
            logger.log("debug", NS.publisher_id,
                       {"message": "Unable to delete bricks %s" % bricks})
示例#19
0
    def run(self):
        logger.log(
            "info",
            NS.publisher_id,
            {"message": "%s running" % self.__class__.__name__}
        )

        gluster_brick_dir = NS.gluster.objects.GlusterBrickDir()
        gluster_brick_dir.save()

        cluster = NS.tendrl.objects.Cluster(
            integration_id=NS.tendrl_context.integration_id
        ).load()
        if cluster.cluster_network in [None, ""]:
            try:
                node_networks = NS.tendrl.objects.NodeNetwork().load_all()
                cluster.cluster_network = node_networks[0].subnet
                cluster.save()
            except etcd.EtcdKeyNotFound as ex:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Failed to sync cluster network details"}
                )
        _sleep = 0
        while not self._complete.is_set():
            # To detect out of band deletes
            # refresh gluster object inventory at config['sync_interval']
            SYNC_TTL = int(NS.config.data.get("sync_interval", 10)) + 100
            NS.node_context = NS.node_context.load()
            NS.tendrl_context = NS.tendrl_context.load()
            if _sleep > 5:
                _sleep = int(NS.config.data.get("sync_interval", 10))
            else:
                _sleep += 1

            try:
                _cluster = NS.tendrl.objects.Cluster(
                    integration_id=NS.tendrl_context.integration_id
                ).load()
                if (_cluster.status == "importing" and
                    _cluster.current_job['status'] == 'failed') or \
                    _cluster.status == "unmanaging" or \
                    _cluster.status == "set_volume_profiling":
                    continue

                _cnc = NS.tendrl.objects.ClusterNodeContext(
                    node_id=NS.node_context.node_id
                ).load()
                _cnc.is_managed = "yes"
                _cnc.save()
                subprocess.call(
                    [
                        'gluster',
                        'get-state',
                        'glusterd',
                        'odir',
                        '/var/run',
                        'file',
                        'glusterd-state',
                        'detail'
                    ]
                )
                raw_data = ini2json.ini_to_dict(
                    '/var/run/glusterd-state'
                )
                subprocess.call(['rm', '-rf', '/var/run/glusterd-state'])
                subprocess.call(
                    [
                        'gluster',
                        'get-state',
                        'glusterd',
                        'odir',
                        '/var/run',
                        'file',
                        'glusterd-state-vol-opts',
                        'volumeoptions'
                    ]
                )
                raw_data_options = ini2json.ini_to_dict(
                    '/var/run/glusterd-state-vol-opts'
                )
                subprocess.call(
                    [
                        'rm',
                        '-rf',
                        '/var/run/glusterd-state-vol-opts'
                    ]
                )
                sync_object = NS.gluster.objects.\
                    SyncObject(data=json.dumps(raw_data))
                sync_object.save()

                if "Peers" in raw_data:
                    index = 1
                    peers = raw_data["Peers"]
                    disconnected_hosts = []
                    while True:
                        try:
                            peer = NS.tendrl.\
                                objects.GlusterPeer(
                                    peer_uuid=peers['peer%s.uuid' % index],
                                    hostname=peers[
                                        'peer%s.primary_hostname' % index
                                    ],
                                    state=peers['peer%s.state' % index],
                                    connected=peers['peer%s.connected' % index]
                                )
                            try:
                                stored_peer_status = None
                                # find peer detail using hostname
                                ip = socket.gethostbyname(
                                    peers['peer%s.primary_hostname' % index]
                                )
                                node_id = etcd_utils.read(
                                    "/indexes/ip/%s" % ip
                                ).value
                                stored_peer = NS.tendrl.objects.GlusterPeer(
                                    peer_uuid=peers['peer%s.uuid' % index],
                                    node_id=node_id
                                ).load()
                                stored_peer_status = stored_peer.connected
                                current_status = peers[
                                    'peer%s.connected' % index
                                ]
                                if stored_peer_status and \
                                    current_status != stored_peer_status:
                                    msg = (
                                        "Peer %s in cluster %s "
                                        "is %s"
                                    ) % (
                                        peers[
                                            'peer%s.primary_hostname' %
                                            index
                                        ],
                                        _cluster.short_name,
                                        current_status
                                    )
                                    instance = "peer_%s" % peers[
                                        'peer%s.primary_hostname' % index
                                    ]
                                    event_utils.emit_event(
                                        "peer_status",
                                        current_status,
                                        msg,
                                        instance,
                                        'WARNING' if current_status !=
                                        'Connected'
                                        else 'INFO'
                                    )
                                    # save current status in actual peer
                                    # directory also
                                    stored_peer.connected = current_status
                                    stored_peer.save()
                                    # Disconnected host name to
                                    # raise brick alert
                                    if current_status.lower() == \
                                        "disconnected":
                                        disconnected_hosts.append(
                                            peers[
                                                'peer%s.primary_hostname' %
                                                index
                                            ]
                                        )
                            except etcd.EtcdKeyNotFound:
                                pass
                            SYNC_TTL += 5
                            peer.save(ttl=SYNC_TTL)
                            index += 1
                        except KeyError:
                            break
                    # Raise an alert for bricks when peer disconnected
                    # or node goes down
                    for disconnected_host in disconnected_hosts:
                        brick_status_alert(
                            disconnected_host
                        )
                if "Volumes" in raw_data:
                    index = 1
                    volumes = raw_data['Volumes']
                    # instantiating blivet class, this will be used for
                    # getting brick_device_details
                    b = blivet.Blivet()

                    # reset blivet during every sync to get latest information
                    # about storage devices in the machine
                    b.reset()
                    devicetree = b.devicetree
                    total_brick_count = 0
                    while True:
                        try:
                            b_count = sync_volumes(
                                volumes, index,
                                raw_data_options.get('Volume Options'),
                                SYNC_TTL + VOLUME_TTL,
                                _cluster.short_name,
                                devicetree
                            )
                            index += 1
                            SYNC_TTL += 1
                            total_brick_count += b_count - 1
                        except KeyError:
                            global VOLUME_TTL
                            # from second sync volume ttl is
                            # SYNC_TTL + (no.volumes) * 20 +
                            # (no.of.bricks) * 10 + 160
                            if index > 1:
                                volume_count = index - 1
                                # When all nodes are down we are updating all
                                # volumes are down, node status TTL is 160,
                                # So make sure volumes are present in etcd
                                # while raising volume down alert
                                VOLUME_TTL = (volume_count * 20) + (
                                    total_brick_count * 10) + 160
                            break
                    # populate the volume specific options
                    reg_ex = re.compile("^volume[0-9]+.options+")
                    options = {}
                    for key in volumes.keys():
                        if reg_ex.match(key):
                            options[key] = volumes[key]
                    for key in options.keys():
                        volname = key.split('.')[0]
                        vol_id = volumes['%s.id' % volname]
                        dict1 = {}
                        for k, v in options.items():
                            if k.startswith('%s.options' % volname):
                                dict1['.'.join(k.split(".")[2:])] = v
                                options.pop(k, None)
                        volume = NS.tendrl.objects.GlusterVolume(
                            NS.tendrl_context.integration_id,
                            vol_id=vol_id
                        ).load()
                        if volume.options is not None:
                            dest = dict(volume.options)
                            dest.update(dict1)
                            volume.options = dest
                            volume.save()

                # Sync cluster global details
                if "provisioner/%s" % NS.tendrl_context.integration_id \
                    in NS.node_context.tags:
                    all_volumes = NS.tendrl.objects.GlusterVolume(
                        NS.tendrl_context.integration_id
                    ).load_all() or []
                    volumes = []
                    for volume in all_volumes:
                        if not str(volume.deleted).lower() == "true" and \
                            volume.current_job.get('status', '') \
                            in ['', 'finished', 'failed'] and \
                            volume.vol_id not in [None, ''] and \
                            volume.name not in [None, '']:
                            # only for first sync refresh volume TTL
                            # It will increase TTL based on no.of volumes
                            if _cnc.first_sync_done in [None, "no", ""]:
                                etcd_utils.refresh(
                                    volume.value,
                                    SYNC_TTL + VOLUME_TTL
                                )
                            volumes.append(volume)
                    cluster_status.sync_cluster_status(
                        volumes, SYNC_TTL + VOLUME_TTL
                    )
                    utilization.sync_utilization_details(volumes)
                    client_connections.sync_volume_connections(volumes)
                    georep_details.aggregate_session_status()
                    try:
                        evt.process_events()
                    except etcd.EtcdKeyNotFound:
                        pass
                    rebalance_status.sync_volume_rebalance_status(volumes)
                    rebalance_status.sync_volume_rebalance_estimated_time(
                        volumes
                    )
                    snapshots.sync_volume_snapshots(
                        raw_data['Volumes'],
                        int(NS.config.data.get(
                            "sync_interval", 10
                        )) + len(volumes) * 4
                    )
                    # update alert count
                    update_cluster_alert_count()
                # check and enable volume profiling
                if "provisioner/%s" % NS.tendrl_context.integration_id in \
                    NS.node_context.tags:
                    self._enable_disable_volume_profiling()

                _cluster = NS.tendrl.objects.Cluster(
                    integration_id=NS.tendrl_context.integration_id
                ).load()
                if _cluster.exists():
                    _cluster = _cluster.load()
                    _cluster.last_sync = str(tendrl_now())
                    # Mark the first sync done flag
                    _cnc = NS.tendrl.objects.ClusterNodeContext(
                        node_id=NS.node_context.node_id
                    ).load()
                    if _cnc.first_sync_done in [None, "no"]:
                        _cnc.first_sync_done = "yes"
                        _cnc.save()
                    if _cluster.current_job.get(
                        'status', ''
                    ) in ['', 'finished', 'failed'] and \
                        _cluster.status in [None, ""]:
                        _cluster.save()
            except Exception as ex:
                Event(
                    ExceptionMessage(
                        priority="error",
                        publisher=NS.publisher_id,
                        payload={"message": "gluster sds state sync error",
                                 "exception": ex
                                 }
                    )
                )
            try:
                etcd_utils.read(
                    '/clusters/%s/_sync_now' %
                    NS.tendrl_context.integration_id
                )
                continue
            except etcd.EtcdKeyNotFound:
                pass

            time.sleep(_sleep)

        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "%s complete" % self.__class__.__name__}
        )
示例#20
0
 def save(self, update=True, ttl=None):
     self.invalidate_hash()
     super(GlobalDetails, self).save(update)
     status = self.value + "/status"
     if ttl:
         etcd_utils.refresh(status, ttl)
示例#21
0
 def save(self, update=True, ttl=None):
     super(ClusterNodeContext, self).save(update)
     status = self.value + "/status"
     if ttl:
         etcd_utils.refresh(status, ttl)
示例#22
0
    def run(self):
        logger.log(
            "info",
            NS.publisher_id,
            {"message": "%s running" % self.__class__.__name__}
        )

        gluster_brick_dir = NS.gluster.objects.GlusterBrickDir()
        gluster_brick_dir.save()

        cluster = NS.tendrl.objects.Cluster(
            integration_id=NS.tendrl_context.integration_id
        ).load()
        if cluster.cluster_network in [None, ""]:
            try:
                node_networks = NS.tendrl.objects.NodeNetwork().load_all()
                cluster.cluster_network = node_networks[0].subnet
                cluster.save()
            except etcd.EtcdKeyNotFound as ex:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Failed to sync cluster network details"}
                )
        _sleep = 0
        while not self._complete.is_set():
            # To detect out of band deletes
            # refresh gluster object inventory at config['sync_interval']
            SYNC_TTL = int(NS.config.data.get("sync_interval", 10)) + 100
            NS.node_context = NS.node_context.load()
            NS.tendrl_context = NS.tendrl_context.load()
            if _sleep > 5:
                _sleep = int(NS.config.data.get("sync_interval", 10))
            else:
                _sleep += 1

            try:
                _cluster = NS.tendrl.objects.Cluster(
                    integration_id=NS.tendrl_context.integration_id
                ).load()
                if (_cluster.status == "importing" and (
                    _cluster.current_job['status'] == 'failed')) or \
                    _cluster.status == "unmanaging" or \
                    _cluster.status == "set_volume_profiling":
                    time.sleep(_sleep)
                    continue

                _cnc = NS.tendrl.objects.ClusterNodeContext(
                    node_id=NS.node_context.node_id
                ).load()
                _cnc.is_managed = "yes"
                _cnc.save()
                subprocess.call(
                    [
                        'gluster',
                        'get-state',
                        'glusterd',
                        'odir',
                        '/var/run',
                        'file',
                        'glusterd-state',
                        'detail'
                    ]
                )
                raw_data = ini2json.ini_to_dict(
                    '/var/run/glusterd-state'
                )
                subprocess.call(['rm', '-rf', '/var/run/glusterd-state'])
                subprocess.call(
                    [
                        'gluster',
                        'get-state',
                        'glusterd',
                        'odir',
                        '/var/run',
                        'file',
                        'glusterd-state-vol-opts',
                        'volumeoptions'
                    ]
                )
                raw_data_options = ini2json.ini_to_dict(
                    '/var/run/glusterd-state-vol-opts'
                )
                subprocess.call(
                    [
                        'rm',
                        '-rf',
                        '/var/run/glusterd-state-vol-opts'
                    ]
                )
                sync_object = NS.gluster.objects.\
                    SyncObject(data=json.dumps(raw_data))
                sync_object.save()

                if "Peers" in raw_data:
                    index = 1
                    peers = raw_data["Peers"]
                    disconnected_hosts = []
                    while True:
                        try:
                            peer = NS.tendrl.\
                                objects.GlusterPeer(
                                    peer_uuid=peers['peer%s.uuid' % index],
                                    hostname=peers[
                                        'peer%s.primary_hostname' % index
                                    ],
                                    state=peers['peer%s.state' % index],
                                    connected=peers['peer%s.connected' % index]
                                )
                            try:
                                stored_peer_status = None
                                # find peer detail using hostname
                                ip = socket.gethostbyname(
                                    peers['peer%s.primary_hostname' % index]
                                )
                                node_id = etcd_utils.read(
                                    "/indexes/ip/%s" % ip
                                ).value
                                stored_peer = NS.tendrl.objects.GlusterPeer(
                                    peer_uuid=peers['peer%s.uuid' % index],
                                    node_id=node_id
                                ).load()
                                stored_peer_status = stored_peer.connected
                                current_status = peers[
                                    'peer%s.connected' % index
                                ]
                                if stored_peer_status and \
                                    current_status != stored_peer_status:
                                    msg = (
                                        "Peer %s in cluster %s "
                                        "is %s"
                                    ) % (
                                        peers[
                                            'peer%s.primary_hostname' %
                                            index
                                        ],
                                        _cluster.short_name,
                                        current_status
                                    )
                                    instance = "peer_%s" % peers[
                                        'peer%s.primary_hostname' % index
                                    ]
                                    event_utils.emit_event(
                                        "peer_status",
                                        current_status,
                                        msg,
                                        instance,
                                        'WARNING'
                                        if current_status != 'Connected'
                                        else 'INFO'
                                    )
                                    # save current status in actual peer
                                    # directory also
                                    stored_peer.connected = current_status
                                    stored_peer.save()
                                    # Disconnected host name to
                                    # raise brick alert
                                    if current_status.lower() == \
                                        "disconnected":
                                        disconnected_hosts.append(
                                            peers[
                                                'peer%s.primary_hostname' %
                                                index
                                            ]
                                        )
                            except etcd.EtcdKeyNotFound:
                                pass
                            SYNC_TTL += 5
                            peer.save(ttl=SYNC_TTL)
                            index += 1
                        except KeyError:
                            break
                    # Raise an alert for bricks when peer disconnected
                    # or node goes down
                    for disconnected_host in disconnected_hosts:
                        brick_status_alert(
                            disconnected_host
                        )
                if "Volumes" in raw_data:
                    # create devicetree using lsblk
                    devicetree = get_device_tree()
                    # find lvs
                    lvs = brick_utilization.get_lvs()
                    index = 1
                    volumes = raw_data['Volumes']
                    total_brick_count = 0
                    while True:
                        try:
                            b_count = sync_volumes(
                                volumes, index,
                                raw_data_options.get('Volume Options'),
                                SYNC_TTL + VOLUME_TTL,
                                _cluster.short_name,
                                devicetree,
                                lvs
                            )
                            index += 1
                            SYNC_TTL += 1
                            total_brick_count += b_count - 1
                        except KeyError:
                            global VOLUME_TTL
                            # from second sync volume ttl is
                            # SYNC_TTL + (no.volumes) * 20 +
                            # (no.of.bricks) * 10 + 160
                            if index > 1:
                                volume_count = index - 1
                                # When all nodes are down we are updating all
                                # volumes are down, node status TTL is 160,
                                # So make sure volumes are present in etcd
                                # while raising volume down alert
                                VOLUME_TTL = (volume_count * 20) + (
                                    total_brick_count * 10) + 160
                            break
                    # populate the volume specific options
                    reg_ex = re.compile("^volume[0-9]+.options+")
                    options = {}
                    for key in volumes.keys():
                        if reg_ex.match(key):
                            options[key] = volumes[key]
                    for key in options.keys():
                        volname = key.split('.')[0]
                        vol_id = volumes['%s.id' % volname]
                        dict1 = {}
                        for k, v in options.items():
                            if k.startswith('%s.options' % volname):
                                dict1['.'.join(k.split(".")[2:])] = v
                                options.pop(k, None)
                        volume = NS.tendrl.objects.GlusterVolume(
                            NS.tendrl_context.integration_id,
                            vol_id=vol_id
                        ).load()
                        if volume.options is not None:
                            dest = dict(volume.options)
                            dest.update(dict1)
                            volume.options = dest
                            volume.save()

                # Sync cluster global details
                if "provisioner/%s" % NS.tendrl_context.integration_id \
                    in NS.node_context.tags:
                    all_volumes = NS.tendrl.objects.GlusterVolume(
                        NS.tendrl_context.integration_id
                    ).load_all() or []
                    volumes = []
                    for volume in all_volumes:
                        if not str(volume.deleted).lower() == "true" and \
                            volume.current_job.get('status', '') \
                            in ['', 'finished', 'failed'] and \
                            volume.vol_id not in [None, ''] and \
                            volume.name not in [None, '']:
                            # only for first sync refresh volume TTL
                            # It will increase TTL based on no.of volumes
                            if _cnc.first_sync_done in [None, "no", ""]:
                                etcd_utils.refresh(
                                    volume.value,
                                    SYNC_TTL + VOLUME_TTL
                                )
                            volumes.append(volume)
                    cluster_status.sync_cluster_status(
                        volumes, SYNC_TTL + VOLUME_TTL
                    )
                    utilization.sync_utilization_details(volumes)
                    client_connections.sync_volume_connections(volumes)
                    georep_details.aggregate_session_status()
                    try:
                        evt.process_events()
                    except etcd.EtcdKeyNotFound:
                        pass
                    rebalance_status.sync_volume_rebalance_status(volumes)
                    rebalance_status.sync_volume_rebalance_estimated_time(
                        volumes
                    )
                    snapshots.sync_volume_snapshots(
                        raw_data['Volumes'],
                        int(NS.config.data.get(
                            "sync_interval", 10
                        )) + len(volumes) * 4
                    )
                    # update alert count
                    update_cluster_alert_count()
                # check and enable volume profiling
                if "provisioner/%s" % NS.tendrl_context.integration_id in \
                    NS.node_context.tags:
                    self._update_volume_profiling()

                _cluster = NS.tendrl.objects.Cluster(
                    integration_id=NS.tendrl_context.integration_id
                ).load()
                if _cluster.exists():
                    _cluster = _cluster.load()
                    _cluster.last_sync = str(tendrl_now())
                    # Mark the first sync done flag
                    _cnc = NS.tendrl.objects.ClusterNodeContext(
                        node_id=NS.node_context.node_id
                    ).load()
                    if _cnc.first_sync_done in [None, "no"]:
                        _cnc.first_sync_done = "yes"
                        _cnc.save()
                    if _cluster.current_job.get(
                        'status', ''
                    ) in ['', 'finished', 'failed'] and \
                        _cluster.status in [None, ""]:
                        _cluster.save()
            except Exception as ex:
                Event(
                    ExceptionMessage(
                        priority="error",
                        publisher=NS.publisher_id,
                        payload={"message": "gluster sds state sync error",
                                 "exception": ex
                                 }
                    )
                )
            try:
                etcd_utils.read(
                    '/clusters/%s/_sync_now' %
                    NS.tendrl_context.integration_id
                )
                continue
            except etcd.EtcdKeyNotFound:
                pass

            time.sleep(_sleep)

        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "%s complete" % self.__class__.__name__}
        )
def sync(sync_ttl=None):
    try:
        tags = []
        # update node agent service details
        logger.log("debug", NS.publisher_id,
                   {"message": "node_sync, Updating Service data"})
        for service in TENDRL_SERVICES:
            s = NS.tendrl.objects.Service(service=service)
            if s.running:
                service_tag = NS.compiled_definitions.get_parsed_defs(
                )['namespace.tendrl']['tags'][service.strip("@*")]
                tags.append(service_tag)

                if service_tag == "tendrl/server":
                    tags.append("tendrl/monitor")
            s.save()

        if "tendrl/monitor" not in tags and \
            NS.tendrl_context.integration_id:
            _cluster = NS.tendrl.objects.Cluster(
                integration_id=NS.tendrl_context.integration_id).load()
            # Try to claim orphan "provisioner_%integration_id" tag
            _tag = "provisioner/%s" % _cluster.integration_id
            _is_new_provisioner = False
            NS.node_context = NS.tendrl.objects.NodeContext().load()
            if _tag not in NS.node_context.tags:
                try:
                    _index_key = "/indexes/tags/%s" % _tag
                    _node_id = json.dumps([NS.node_context.node_id])
                    etcd_utils.write(_index_key, _node_id, prevExist=False)
                    etcd_utils.refresh(_index_key, sync_ttl + 50)
                    tags.append(_tag)
                    _is_new_provisioner = True
                except etcd.EtcdAlreadyExist:
                    pass

        # updating node context with latest tags
        logger.log(
            "debug", NS.publisher_id,
            {"message": "node_sync, updating node context "
             "data with tags"})
        NS.node_context = NS.tendrl.objects.NodeContext().load()
        current_tags = list(NS.node_context.tags)
        tags += current_tags
        NS.node_context.tags = list(set(tags))
        NS.node_context.tags.sort()
        current_tags.sort()
        if NS.node_context.tags != current_tags:
            NS.node_context.save()

        if "tendrl/monitor" not in tags and \
            NS.tendrl_context.integration_id:
            _cluster = _cluster.load()
            if _is_new_provisioner and _cluster.is_managed == "yes":
                _msg = "node_sync, NEW provisioner node found! "\
                    "re-configuring monitoring (job-id: %s) on this node"
                payload = {
                    "tags": ["tendrl/node_%s" % NS.node_context.node_id],
                    "run": "tendrl.flows.ConfigureMonitoring",
                    "status": "new",
                    "parameters": {
                        'TendrlContext.integration_id':
                        NS.tendrl_context.integration_id
                    },
                    "type": "node"
                }
                _job_id = str(uuid.uuid4())
                NS.tendrl.objects.Job(job_id=_job_id,
                                      status="new",
                                      payload=payload).save()
                logger.log("debug", NS.publisher_id,
                           {"message": _msg % _job_id})

        # Update /indexes/tags/:tag = [node_ids]
        for tag in NS.node_context.tags:

            index_key = "/indexes/tags/%s" % tag
            _node_ids = []
            try:
                _node_ids = etcd_utils.read(index_key).value
                _node_ids = json.loads(_node_ids)
            except etcd.EtcdKeyNotFound:
                pass

            if _node_ids:
                if "provisioner" in tag:
                    # Check if this is a stale provisioner
                    if NS.node_context.node_id != _node_ids[0]:
                        NS.node_context.tags.remove(tag)
                        NS.node_context.save()
                        continue
                if NS.node_context.node_id in _node_ids:
                    if sync_ttl and len(_node_ids) == 1:
                        etcd_utils.refresh(index_key, sync_ttl + 50)

                    continue
                else:
                    _node_ids += [NS.node_context.node_id]
            else:
                _node_ids = [NS.node_context.node_id]
            _node_ids = list(set(_node_ids))

            etcd_utils.write(index_key, json.dumps(_node_ids))
            if sync_ttl and len(_node_ids) == 1:
                etcd_utils.refresh(index_key, sync_ttl + 50)
        logger.log("debug", NS.publisher_id,
                   {"message": "node_sync, Updating detected "
                    "platform"})
    except Exception as ex:
        Event(
            ExceptionMessage(priority="error",
                             publisher=NS.publisher_id,
                             payload={
                                 "message":
                                 "node_sync service and indexes "
                                 "sync failed: " + ex.message,
                                 "exception":
                                 ex
                             }))
示例#24
0
 def save(self, update=True, ttl=None):
     self.invalidate_hash()
     super(GlobalDetails, self).save(update)
     status = self.value + "/status"
     if ttl:
         etcd_utils.refresh(status, ttl)
示例#25
0
 def save(self, update=True, ttl=None):
     super(ClusterNodeContext, self).save(update)
     status = self.value + "/status"
     if ttl:
         etcd_utils.refresh(status, ttl)