Exemplo n.º 1
0
    def aggregate_raw_times(self, key, service_name, max_batch_size=None):
        """ Aggregates values from a list living under a given key. Returns its
        min, max, mean and an overall usage count. 'max_batch_size' controls how
        many items will be fetched from the list so it's possible to fetch less
        items than its LLEN returns.
        """
        key_len = self.server.kvdb.conn.llen(key)
        if max_batch_size:
            batch_size = min(key_len, max_batch_size)
            if batch_size < key_len:
                msg = 'batch_size:`%s` < key_len:`%s`, max_batch_size:`%s`, key:`%s`, ' \
                'consider decreasing the job interval or increasing max_batch_size'
                self.logger.warn(msg, batch_size, key_len, max_batch_size, key)
        else:
            batch_size = key_len

        times = [
            int(elem)
            for elem in self.server.kvdb.conn.lrange(key, 0, batch_size)
        ]

        if times:
            mean_percentile = int(
                self.server.kvdb.conn.hget(
                    KVDB.SERVICE_TIME_BASIC + service_name, 'mean_percentile')
                or 0)
            max_score = int(percentile(times, mean_percentile))

            return min(times), max(times), (tmean(times, limit_to=max_score)
                                            or 0), len(times)
        else:
            return 0, 0, 0, 0
Exemplo n.º 2
0
    def create_summary(self, target, *pattern_names):
        try:

            now = datetime.utcnow()
            key_prefix = KVDB.SERVICE_SUMMARY_PREFIX_PATTERN.format(target)

            if target == 'by-week':
                start = parse_datetime(
                    (now + relativedelta(weekday=MO(-1))
                     ).strftime('%Y-%m-%d 00:00:00'))  # Current week start
                key_suffix = start.strftime(
                    DT_PATTERNS.SUMMARY_SUFFIX_PATTERNS[target])
            else:
                start = parse_datetime(
                    now.strftime('%Y-%m-%d 00:00:00'))  # Current day start
                key_suffix = now.strftime(
                    DT_PATTERNS.SUMMARY_SUFFIX_PATTERNS[target])
            total_seconds = (now - start).total_seconds()

            patterns = []
            for name in pattern_names:
                patterns.append(
                    getattr(self, 'get_by_{}_patterns'.format(name))(now))

            services = {}

            for elem in chain(*patterns):
                prefix, suffix = elem.split('*')
                suffix = suffix[1:]
                stats = self.collect_service_stats(elem, prefix, suffix, None,
                                                   False, False, False)

                for service_name, values in stats.items():
                    stats = services.setdefault(service_name,
                                                deepcopy(DEFAULT_STATS))

                    for name in STATS_KEYS:
                        value = values[name]
                        if name == 'usage':
                            stats[name] += value
                        elif name == 'max':
                            stats[name] = max(stats[name], value)
                        elif name == 'mean':
                            stats[name].append(value)
                        elif name == 'min':
                            stats[name] = min(stats[name], value)

            for service_name, values in services.items():

                values['mean'] = round(tmean(values['mean']), 2)
                values['rate'] = round(values['usage'] / total_seconds, 2)

        except Exception:
            self.logger.debug('Could not store mean/rate. e=`%r`, locals=`%r`',
                              format_exc(), locals())

        else:
            self.hset_aggr_keys(services, key_prefix, key_suffix)
Exemplo n.º 3
0
    def collect_service_stats(self,
                              keys_pattern,
                              key_prefix,
                              key_suffix,
                              total_seconds,
                              suffix_needs_colon=True,
                              chop_off_service_name=True,
                              needs_rate=True):

        service_stats = {}
        if suffix_needs_colon:
            key_suffix = ':' + key_suffix

        for key in self.kvdb.conn.keys(keys_pattern):
            service_name = key.replace(key_prefix, '').replace(key_suffix, '')
            if chop_off_service_name:
                service_name = service_name[:-3]

            values = self.kvdb.conn.hgetall(key)

            stats = service_stats.setdefault(service_name, {})

            for name in STATS_KEYS:

                value = values.get(name)
                if value:
                    if name in ('rate', 'mean'):
                        value = float(value)
                    else:
                        value = int(value)

                    if not name in stats:
                        if name == 'mean':
                            stats[name] = []
                        elif name == 'min':
                            stats[name] = maxint
                        else:
                            stats[name] = 0

                    if name == 'usage':
                        stats[name] += value
                    elif name == 'max':
                        stats[name] = max(stats[name], value)
                    elif name == 'mean':
                        stats[name].append(value)
                    elif name == 'min':
                        stats[name] = min(stats[name], value)

        for service_name, values in service_stats.items():
            mean = values.get('mean')
            if mean:
                values['mean'] = tmean(mean)

            if needs_rate:
                values['rate'] = values['usage'] / total_seconds

        return service_stats
Exemplo n.º 4
0
    def handle(self):

        if not self.stats_enabled():
            return

        #
        # Sample config values
        #
        # global_slow_threshold=120
        # max_batch_size=99999
        #
        config = Bunch()
        for item in self.request.payload.splitlines():
            key, value = item.split('=')
            config[key] = int(value)

        for key in self.server.kvdb.conn.keys(KVDB.SERVICE_TIME_RAW + '*'):

            service_name = key.replace(KVDB.SERVICE_TIME_RAW, '')

            current_mean = float(
                self.server.kvdb.conn.hget(
                    KVDB.SERVICE_TIME_BASIC + service_name, 'mean_all_time')
                or 0)
            current_min = float(
                self.server.kvdb.conn.hget(
                    KVDB.SERVICE_TIME_BASIC + service_name, 'min_all_time')
                or 0)
            current_max = float(
                self.server.kvdb.conn.hget(
                    KVDB.SERVICE_TIME_BASIC + service_name, 'max_all_time')
                or 0)

            batch_min, batch_max, batch_mean, batch_total = self.aggregate_raw_times(
                key, service_name, config.max_batch_size)

            self.server.kvdb.conn.hset(
                KVDB.SERVICE_TIME_BASIC + service_name, 'mean_all_time',
                tmean(batch_mean, limit_to=current_mean))
            self.server.kvdb.conn.hset(KVDB.SERVICE_TIME_BASIC + service_name,
                                       'min_all_time',
                                       min(current_min, batch_min))
            self.server.kvdb.conn.hset(KVDB.SERVICE_TIME_BASIC + service_name,
                                       'max_all_time',
                                       max(current_max, batch_max))

            # Services use RPUSH for storing raw times so we are safe to use LTRIM
            # in order to do away with the already processed ones
            self.server.kvdb.conn.ltrim(key, batch_total, -1)
Exemplo n.º 5
0
    def get_stats(self,
                  start,
                  stop,
                  service='*',
                  n=None,
                  n_type=None,
                  needs_trends=True,
                  stats_key_prefix=None,
                  suffixes=None):
        """ Returns statistics for a given interval, as defined by 'start' and 'stop'.
        service default to '*' for all services in that period and may be set to return
        a one-element list of information regarding that particular service. Setting 'n'
        to a positive integer will make it return only top n services.
        """
        if not stats_key_prefix:
            stats_key_prefix = self.stats_key_prefix

        stats_elems = {}
        all_services_stats = Bunch({'usage': 0, 'time': 0})

        # All mean values
        mean_all_services_list = []

        # A mean value of all the mean values (mean_all_services_list)
        mean_all_services = 0

        start = parse_datetime(start)
        stop = parse_datetime(stop)
        delta = (stop - start)

        if hasattr(delta, 'total_seconds'):
            delta_seconds = delta.total_seconds()
        else:
            delta_seconds = delta.seconds

        if not suffixes:
            suffixes = self.get_suffixes(start, stop)

        # We make several passes. First two passes are made over Redis keys, one gathers the services, if any at all,
        # and another one actually collects statistics for each service found. Next pass, a partly optional one,
        # computes trends for mean response time and service usage. Another one computes each of the service's
        # average rate and updates other attributes basing on values collected in the previous step.
        # Optionally, the last one will pick only top n elements of a given type (top mean response time
        # or top usage).

        # 1st pass
        for suffix in suffixes:
            keys = self.server.kvdb.conn.keys('{}{}:{}'.format(
                stats_key_prefix, service, suffix))
            for key in keys:
                service_name = key.replace(stats_key_prefix,
                                           '').replace(':{}'.format(suffix),
                                                       '')

                stats_elem = StatsElem(service_name)
                stats_elems[service_name] = stats_elem

                # When building statistics, we can't expect there will be data for all the time
                # elems built above so to guard against it, this is a dictionary whose keys are the
                # said elems and values are mean/usage for each elem. The values will remain
                # 0/0.0 if there is no data for the time elem, which may mean that in this
                # particular time slice the service wasn't invoked at all.
                stats_elem.expected_time_elems = OrderedDict(
                    (elem, Bunch({
                        'mean': 0,
                        'usage': 0.0
                    })) for elem in suffixes)

        # 2nd pass
        for service, stats_elem in stats_elems.items():
            for suffix in suffixes:
                key = '{}{}:{}'.format(stats_key_prefix, service, suffix)

                # We can convert all the values to floats here to ease with computing
                # all the stuff and convert them still to integers later on, when necessary.
                key_values = Bunch(((name, float(value)) for (
                    name,
                    value) in iteritems(self.server.kvdb.conn.hgetall(key))))

                if key_values:

                    time = (key_values.usage * key_values.mean)
                    stats_elem.time += time

                    mean_all_services_list.append(key_values.mean)
                    all_services_stats.time += time
                    all_services_stats.usage += key_values.usage

                    stats_elem.min_resp_time = min(stats_elem.min_resp_time,
                                                   key_values.min)
                    stats_elem.max_resp_time = max(stats_elem.max_resp_time,
                                                   key_values.max)

                    for attr in ('mean', 'usage'):
                        stats_elem.expected_time_elems[suffix][
                            attr] = key_values[attr]

        mean_all_services = '{:.0f}'.format(
            tmean(mean_all_services_list)) if mean_all_services_list else 0

        # 3rd pass (partly optional)
        for stats_elem in stats_elems.values():

            stats_elem.mean_all_services = mean_all_services
            stats_elem.all_services_time = int(all_services_stats.time)
            stats_elem.all_services_usage = int(all_services_stats.usage)

            values = stats_elem.expected_time_elems.values()

            stats_elem.mean_trend_int = [int(elem.mean) for elem in values]
            stats_elem.usage_trend_int = [int(elem.usage) for elem in values]

            stats_elem.mean = float('{:.2f}'.format(
                tmean(stats_elem.mean_trend_int)))
            stats_elem.usage = sum(stats_elem.usage_trend_int)
            stats_elem.rate = float('{:.2f}'.format(
                sum(stats_elem.usage_trend_int) / delta_seconds))

            self.set_percent_of_all_services(all_services_stats, stats_elem)

            if needs_trends:
                stats_elem.mean_trend = ','.join(
                    str(elem) for elem in stats_elem.mean_trend_int)
                stats_elem.usage_trend = ','.join(
                    str(elem) for elem in stats_elem.usage_trend_int)

        # 4th pass (optional)
        if n:
            for stats_elem in self.yield_top_n(n, n_type, stats_elems):
                yield stats_elem

        else:
            for stats_elem in stats_elems.values():
                yield stats_elem