예제 #1
0
    def wrapper(self, *args, **kwargs):
        # TODO(kgriffs): Figure out a way to not have to rely on the
        # presence of `mongodb_conf`
        max_attemps = self.driver.mongodb_conf.max_reconnect_attempts
        sleep_sec = self.driver.mongodb_conf.reconnect_sleep

        last_ex = None
        for attempt in range(max_attemps):
            try:
                return func(self, *args, **kwargs)
                break

            except errors.AutoReconnect as ex:
                LOG.warning(
                    _LW(u'Caught AutoReconnect, retrying the '
                        'call to {0}').format(func))

                last_ex = ex
                time.sleep(sleep_sec * (2**attempt))
        else:
            LOG.error(
                _LE(u'Caught AutoReconnect, maximum attempts '
                    'to {0} exceeded.').format(func))

            raise last_ex
예제 #2
0
        def register_api(driver, conf):
            if (deprecated and
                    [api_version] not in conf.enable_deprecated_api_versions):
                return None

            if deprecated:
                LOG.warning(_LW('Enabling API version %(version)s. '
                                'This version was marked as deprecated in '
                                '%(updated)s. Using it may expose security '
                                'issues, unexpected behavior or damage your '
                                'data.') % {'version': api_version,
                                            'updated': api_updated})
            return fn(driver, conf)
예제 #3
0
        def register_api(driver, conf):
            if (deprecated and [api_version]
                    not in conf.enable_deprecated_api_versions):
                return None

            if deprecated:
                LOG.warning(
                    _LW('Enabling API version %(version)s. '
                        'This version was marked as deprecated in '
                        '%(updated)s. Using it may expose security '
                        'issues, unexpected behavior or damage your '
                        'data.') % {
                            'version': api_version,
                            'updated': api_updated
                        })
            return fn(driver, conf)
예제 #4
0
def setup(conf, binary, host):
    if conf.profiler.enabled:

        # Note(wangxiyuan): OSprofiler now support some kind of backends, such
        # as Ceilometer, ElasticSearch, Messaging and MongoDB.
        # 1. Ceilometer is only used for data collection, and Messaging is only
        # used for data transfer. So Ceilometer only works when Messaging is
        # enabled.
        # 2. ElasticSearch and MongoDB support both data collection and
        # transfer. So they can be used standalone.
        # 3. Choose which backend depends on the config option
        # "connection_string" , and the default value is "messaging://".
        backend_uri = conf.profiler.connection_string
        if "://" not in backend_uri:
            backend_uri += "://"
        parsed_connection = urlparse.urlparse(backend_uri)
        backend_type = parsed_connection.scheme
        if backend_type == "messaging":
            import oslo_messaging
            _notifier = notifier.create(backend_uri, oslo_messaging, {},
                                        oslo_messaging.get_transport(conf),
                                        "Zaqar", binary, host)
        else:
            _notifier = notifier.create(backend_uri,
                                        project="Zaqar",
                                        service=binary,
                                        host=host)
        notifier.set(_notifier)
        LOG.warning(
            _LW("OSProfiler is enabled.\nIt means that person who "
                "knows any of hmac_keys that are specified in "
                "/etc/zaqar/zaqar.conf can trace his requests. \n In "
                "real life only operator can read this file so there "
                "is no security issue. Note that even if person can "
                "trigger profiler, only admin user can retrieve trace "
                "information.\n"
                "To disable OSprofiler set in zaqar.conf:\n"
                "[profiler]\nenabled=false"))
        web.enable(conf.profiler.hmac_keys)
    else:
        web.disable()
예제 #5
0
    def wrapper(self, *args, **kwargs):
        # TODO(prashanthr_) : Try to reuse this utility. Violates DRY
        # Can pass config parameters into the decorator and create a
        # storage level utility.

        max_attemps = self.driver.redis_conf.max_reconnect_attempts
        sleep_sec = self.driver.redis_conf.reconnect_sleep

        for attempt in range(max_attemps):
            try:
                return func(self, *args, **kwargs)

            except redis.exceptions.ConnectionError:
                # NOTE(kgriffs): redis-py will retry once itself,
                # but if the command cannot be sent the second time after
                # disconnecting and reconnecting, the error is raised
                # and we will catch it here.
                #
                # NOTE(kgriffs): When using a sentinel, if a master fails
                # the initial retry will gracefully fail over to the
                # new master if the sentinel failover delay is low enough;
                # if the delay is too long, then redis-py will get a
                # MasterNotFoundError (a subclass of ConnectionError) on
                # it's retry, which will then just get raised and caught
                # here, in which case we will keep retrying until the
                # sentinel completes the failover and stops raising
                # MasterNotFoundError.

                ex = sys.exc_info()[1]
                LOG.warning(
                    _LW(u'Caught ConnectionError, retrying the '
                        'call to {0}').format(func))

                time.sleep(sleep_sec * (2**attempt))
        else:
            LOG.error(
                _LE(u'Caught ConnectionError, maximum attempts '
                    'to {0} exceeded.').format(func))
            raise ex
예제 #6
0
파일: utils.py 프로젝트: AvnishPal/zaqar
    def wrapper(self, *args, **kwargs):
        # TODO(kgriffs): Figure out a way to not have to rely on the
        # presence of `mongodb_conf`
        max_attemps = self.driver.mongodb_conf.max_reconnect_attempts
        sleep_sec = self.driver.mongodb_conf.reconnect_sleep

        last_ex = None
        for attempt in range(max_attemps):
            try:
                return func(self, *args, **kwargs)
                break

            except errors.AutoReconnect as ex:
                LOG.warning(_LW(u'Caught AutoReconnect, retrying the '
                                'call to {0}').format(func))

                last_ex = ex
                time.sleep(sleep_sec * (2 ** attempt))
        else:
            LOG.error(_LE(u'Caught AutoReconnect, maximum attempts '
                          'to {0} exceeded.').format(func))

            raise last_ex
예제 #7
0
파일: utils.py 프로젝트: ISCAS-VDI/zaqar
    def wrapper(self, *args, **kwargs):
        # TODO(prashanthr_) : Try to reuse this utility. Violates DRY
        # Can pass config parameters into the decorator and create a
        # storage level utility.

        max_attemps = self.driver.redis_conf.max_reconnect_attempts
        sleep_sec = self.driver.redis_conf.reconnect_sleep

        for attempt in range(max_attemps):
            try:
                return func(self, *args, **kwargs)

            except redis.exceptions.ConnectionError:
                # NOTE(kgriffs): redis-py will retry once itself,
                # but if the command cannot be sent the second time after
                # disconnecting and reconnecting, the error is raised
                # and we will catch it here.
                #
                # NOTE(kgriffs): When using a sentinel, if a master fails
                # the initial retry will gracefully fail over to the
                # new master if the sentinel failover delay is low enough;
                # if the delay is too long, then redis-py will get a
                # MasterNotFoundError (a subclass of ConnectionError) on
                # it's retry, which will then just get raised and caught
                # here, in which case we will keep retrying until the
                # sentinel completes the failover and stops raising
                # MasterNotFoundError.

                ex = sys.exc_info()[1]
                LOG.warning(_LW(u'Caught ConnectionError, retrying the '
                                'call to {0}').format(func))

                time.sleep(sleep_sec * (2 ** attempt))
        else:
            LOG.error(_LE(u'Caught ConnectionError, maximum attempts '
                          'to {0} exceeded.').format(func))
            raise ex
예제 #8
0
    def post(self, queue_name, messages, client_uuid, project=None):
        # NOTE(flaper87): This method should be safe to retry on
        # autoreconnect, since we've a 2-step insert for messages.
        # The worst-case scenario is that we'll increase the counter
        # several times and we'd end up with some non-active messages.

        if not self._queue_ctrl.exists(queue_name, project):
            raise errors.QueueDoesNotExist(queue_name, project)

        # NOTE(flaper87): Make sure the counter exists. This method
        # is an upsert.
        self._get_counter(queue_name, project)
        now = timeutils.utcnow_ts()
        now_dt = datetime.datetime.utcfromtimestamp(now)
        collection = self._collection(queue_name, project)

        # Set the next basis marker for the first attempt.
        #
        # Note that we don't increment the counter right away because
        # if 2 concurrent posts happen and the one with the higher counter
        # ends before the one with the lower counter, there's a window
        # where a client paging through the queue may get the messages
        # with the higher counter and skip the previous ones. This would
        # make our FIFO guarantee unsound.
        next_marker = self._get_counter(queue_name, project)

        # Unique transaction ID to facilitate atomic batch inserts
        transaction = objectid.ObjectId()

        prepared_messages = [{
            PROJ_QUEUE:
            utils.scope_queue_name(queue_name, project),
            't':
            message['ttl'],
            'e':
            now_dt + datetime.timedelta(seconds=message['ttl']),
            'u':
            client_uuid,
            'c': {
                'id': None,
                'e': now
            },
            'b':
            message['body'] if 'body' in message else {},
            'k':
            next_marker + index,
            'tx':
            transaction,
        } for index, message in enumerate(messages)]

        # NOTE(kgriffs): Don't take the time to do a 2-phase insert
        # if there is no way for it to partially succeed.
        if len(prepared_messages) == 1:
            transaction = None
            prepared_messages[0]['tx'] = None

        # Use a retry range for sanity, although we expect
        # to rarely, if ever, reach the maximum number of
        # retries.
        #
        # NOTE(kgriffs): With the default configuration (100 ms
        # max sleep, 1000 max attempts), the max stall time
        # before the operation is abandoned is 49.95 seconds.
        for attempt in self._retry_range:
            try:
                ids = collection.insert(prepared_messages, check_keys=False)

                # Log a message if we retried, for debugging perf issues
                if attempt != 0:
                    msgtmpl = _(u'%(attempts)d attempt(s) required to post '
                                u'%(num_messages)d messages to queue '
                                u'"%(queue)s" under project %(project)s')

                    LOG.debug(
                        msgtmpl,
                        dict(queue=queue_name,
                             attempts=attempt + 1,
                             num_messages=len(ids),
                             project=project))

                # Update the counter in preparation for the next batch
                #
                # NOTE(kgriffs): Due to the unique index on the messages
                # collection, competing inserts will fail as a whole,
                # and keep retrying until the counter is incremented
                # such that the competing marker's will start at a
                # unique number, 1 past the max of the messages just
                # inserted above.
                self._inc_counter(queue_name, project, amount=len(ids))

                # NOTE(kgriffs): Finalize the insert once we can say that
                # all the messages made it. This makes bulk inserts
                # atomic, assuming queries filter out any non-finalized
                # messages.
                if transaction is not None:
                    collection.update({'tx': transaction},
                                      {'$set': {
                                          'tx': None
                                      }},
                                      upsert=False,
                                      multi=True)

                return [str(id_) for id_ in ids]

            except pymongo.errors.DuplicateKeyError as ex:
                # TODO(kgriffs): Record stats of how often retries happen,
                # and how many attempts, on average, are required to insert
                # messages.

                # NOTE(kgriffs): This can be used in conjunction with the
                # log line, above, that is emitted after all messages have
                # been posted, to gauge how long it is taking for messages
                # to be posted to a given queue, or overall.
                #
                # TODO(kgriffs): Add transaction ID to help match up loglines
                if attempt == 0:
                    msgtmpl = _(u'First attempt failed while '
                                u'adding messages to queue '
                                u'"%(queue)s" under project %(project)s')

                    LOG.debug(msgtmpl, dict(queue=queue_name, project=project))

                # NOTE(kgriffs): Never retry past the point that competing
                # messages expire and are GC'd, since once they are gone,
                # the unique index no longer protects us from getting out
                # of order, which could cause an observer to miss this
                # message. The code below provides a sanity-check to ensure
                # this situation can not happen.
                elapsed = timeutils.utcnow_ts() - now
                if elapsed > MAX_RETRY_POST_DURATION:
                    msgtmpl = _LW(u'Exceeded maximum retry duration for queue '
                                  u'"%(queue)s" under project %(project)s')

                    LOG.warning(msgtmpl, dict(queue=queue_name,
                                              project=project))
                    break

                # Chill out for a moment to mitigate thrashing/thundering
                self._backoff_sleep(attempt)

                # NOTE(kgriffs): Perhaps we failed because a worker crashed
                # after inserting messages, but before incrementing the
                # counter; that would cause all future requests to stall,
                # since they would keep getting the same base marker that is
                # conflicting with existing messages, until the messages that
                # "won" expire, at which time we would end up reusing markers,
                # and that could make some messages invisible to an observer
                # that is querying with a marker that is large than the ones
                # being reused.
                #
                # To mitigate this, we apply a heuristic to determine whether
                # a counter has stalled. We attempt to increment the counter,
                # but only if it hasn't been updated for a few seconds, which
                # should mean that nobody is left to update it!
                #
                # Note that we increment one at a time until the logjam is
                # broken, since we don't know how many messages were posted
                # by the worker before it crashed.
                next_marker = self._inc_counter(queue_name,
                                                project,
                                                window=COUNTER_STALL_WINDOW)

                # Retry the entire batch with a new sequence of markers.
                #
                # NOTE(kgriffs): Due to the unique index, and how
                # MongoDB works with batch requests, we will never
                # end up with a partially-successful update. The first
                # document in the batch will fail to insert, and the
                # remainder of the documents will not be attempted.
                if next_marker is None:
                    # NOTE(kgriffs): Usually we will end up here, since
                    # it should be rare that a counter becomes stalled.
                    next_marker = self._get_counter(queue_name, project)
                else:
                    msgtmpl = _LW(u'Detected a stalled message counter '
                                  u'for queue "%(queue)s" under '
                                  u'project %(project)s.'
                                  u'The counter was incremented to %(value)d.')

                    LOG.warning(
                        msgtmpl,
                        dict(queue=queue_name,
                             project=project,
                             value=next_marker))

                for index, message in enumerate(prepared_messages):
                    message['k'] = next_marker + index

            except Exception as ex:
                LOG.exception(ex)
                raise

        msgtmpl = _LW(u'Hit maximum number of attempts (%(max)s) for queue '
                      u'"%(queue)s" under project %(project)s')

        LOG.warning(
            msgtmpl,
            dict(max=self.driver.mongodb_conf.max_attempts,
                 queue=queue_name,
                 project=project))

        raise errors.MessageConflict(queue_name, project)
예제 #9
0
    def _inc_counter(self, queue_name, project=None, amount=1, window=None):
        """Increments the message counter and returns the new value.

        :param queue_name: Name of the queue to which the counter is scoped
        :param project: Queue's project name
        :param amount: (Default 1) Amount by which to increment the counter
        :param window: (Default None) A time window, in seconds, that
            must have elapsed since the counter was last updated, in
            order to increment the counter.

        :returns: Updated message counter value, or None if window
            was specified, and the counter has already been updated
            within the specified time period.

        :raises: storage.errors.QueueDoesNotExist
        """

        # NOTE(flaper87): If this `if` is True, it means we're
        # using a mongodb in the control plane. To avoid breaking
        # environments doing so already, we'll keep using the counter
        # in the mongodb queue_controller rather than the one in the
        # message_controller. This should go away, eventually
        if hasattr(self._queue_ctrl, '_inc_counter'):
            return self._queue_ctrl._inc_counter(queue_name, project, amount,
                                                 window)

        now = timeutils.utcnow_ts()

        update = {'$inc': {'c.v': amount}, '$set': {'c.t': now}}
        query = _get_scoped_query(queue_name, project)
        if window is not None:
            threshold = now - window
            query['c.t'] = {'$lt': threshold}

        while True:
            try:
                collection = self._collection(queue_name, project).stats
                doc = collection.find_one_and_update(
                    query,
                    update,
                    return_document=pymongo.ReturnDocument.AFTER,
                    projection={
                        'c.v': 1,
                        '_id': 0
                    })

                break
            except pymongo.errors.AutoReconnect as ex:
                LOG.exception(ex)

        if doc is None:
            if window is None:
                # NOTE(kgriffs): Since we did not filter by a time window,
                # the queue should have been found and updated. Perhaps
                # the queue has been deleted?
                message = _LW(u'Failed to increment the message '
                              u'counter for queue %(name)s and '
                              u'project %(project)s')
                message %= dict(name=queue_name, project=project)

                LOG.warning(message)

                raise errors.QueueDoesNotExist(queue_name, project)

            # NOTE(kgriffs): Assume the queue existed, but the counter
            # was recently updated, causing the range query on 'c.t' to
            # exclude the record.
            return None

        return doc['c']['v']