Пример #1
0
def calculate_degradation_area(table, smooth, etalon_stats, etalon_threshold):
    table = [p for p in table if not p.error]  # rm errors
    if len(table) <= WINDOW_SIZE:
        return []

    mean_times = [p.time for p in smooth]
    mean_durations = [p.duration for p in smooth]
    mean_vars = [p.var for p in smooth]

    clusters = find_clusters(
        mean_durations,
        filter_fn=lambda y: 0 if abs(y) < etalon_threshold else 1)

    # calculate cluster duration
    degradation_cluster_stats = []
    for cluster in clusters:
        start_idx = int(cluster.inf)
        end_idx = int(cluster.sup)
        start_time = mean_times[start_idx]
        end_time = mean_times[end_idx]
        duration = end_time - start_time
        var = np.mean(mean_vars[start_idx: end_idx])

        # point durations
        point_durations = []
        for p in table:
            if start_time < p.time < end_time:
                point_durations.append(p.duration)

        # calculate difference between means
        # http://onlinestatbook.com/2/tests_of_means/difference_means.html
        anomaly_mean = np.mean(point_durations)
        anomaly_var = np.var(point_durations)
        se = math.sqrt(anomaly_var / len(point_durations) +
                       etalon_stats.var / etalon_stats.count)
        dof = etalon_stats.count + len(point_durations) - 2
        mean_diff = anomaly_mean - etalon_stats.mean
        conf_interval = stats.t.interval(0.95, dof, loc=mean_diff, scale=se)

        degradation = types.MeanVar(
            mean_diff, np.mean([mean_diff - conf_interval[0],
                                conf_interval[1] - mean_diff]))
        degradation_ratio = types.MeanVar(
            anomaly_mean / etalon_stats.mean,
            np.mean([(mean_diff - conf_interval[0]) / etalon_stats.mean,
                     (conf_interval[1] - mean_diff) / etalon_stats.mean]))

        logging.debug('Mean diff: %s' % mean_diff)
        logging.debug('Conf int: %s' % str(conf_interval))

        degradation_cluster_stats.append(types.DegradationClusterStats(
            start=start_time, end=end_time,
            duration=types.MeanVar(duration, var),
            degradation=degradation, degradation_ratio=degradation_ratio,
            count=len(point_durations)
        ))

    return degradation_cluster_stats
Пример #2
0
def indexed_interval_to_time_interval(table, src_interval):
    """For given indexes in the table return time interval

    :param table: [DataRow] source data
    :param src_interval: interval of array indexes
    :return: ClusterStats
    """
    start_index = int(src_interval.inf)
    end_index = int(src_interval.sup)

    if start_index > 0:
        d_start = (table[start_index].time - table[start_index - 1].time) / 2
    else:
        d_start = 0

    if end_index < len(table) - 1:
        d_end = (table[end_index + 1].time - table[end_index].time) / 2
    else:
        d_end = 0

    start_time = table[start_index].time - d_start
    end_time = table[end_index].time + d_end
    var = d_start + d_end
    duration = end_time - start_time
    count = sum(1 if start_time <= p.time <= end_time else 0 for p in table)

    return types.ClusterStats(start=start_time, end=end_time, count=count,
                              duration=types.MeanVar(duration, var))
Пример #3
0
def process_all_runs(runs):
    """Process all runs from Rally raw data report

    This function returns summary stats for all runs, including downtime
    duration, MTTR, performance degradation.

    :param runs: collection of Rally runs
    :return: SummaryResult
    """
    run_results = []
    downtime_statistic = []
    downtime_var = []
    ttr_statistic = []
    ttr_var = []
    degradation_statistic = []
    degradation_var = []
    degradation_ratio_statistic = []
    degradation_ratio_var = []

    for i, one_run in enumerate(runs):
        run_result = process_one_run(one_run)
        run_results.append(run_result)

        ds = 0
        for index, stat in enumerate(run_result.error_area):
            ds += stat.duration.statistic
            downtime_var.append(stat.duration.var)

        if run_result.error_area:
            downtime_statistic.append(ds)

        ts = ss = sr = 0
        for index, stat in enumerate(run_result.degradation_area):
            ts += stat.duration.statistic
            ttr_var.append(stat.duration.var)
            ss += stat.degradation.statistic
            degradation_var.append(stat.degradation.var)
            sr += stat.degradation_ratio.statistic
            degradation_ratio_var.append(stat.degradation_ratio.var)

        if run_result.degradation_area:
            ttr_statistic.append(ts)
            degradation_statistic.append(ss)
            degradation_ratio_statistic.append(sr)

    downtime = None
    if downtime_statistic:
        downtime_mean = np.mean(downtime_statistic)
        se = math.sqrt((sum(downtime_var) +
                       np.var(downtime_statistic)) / len(downtime_statistic))
        downtime = types.MeanVar(downtime_mean, se)
    mttr = None
    if ttr_statistic:
        ttr_mean = np.mean(ttr_statistic)
        se = math.sqrt((sum(ttr_var) +
                        np.var(ttr_statistic)) / len(ttr_statistic))
        mttr = types.MeanVar(ttr_mean, se)
    degradation = None
    degradation_ratio = None
    if degradation_statistic:
        degradation = types.MeanVar(np.mean(degradation_statistic),
                                    np.mean(degradation_var))
        degradation_ratio = types.MeanVar(np.mean(degradation_ratio_statistic),
                                          np.mean(degradation_ratio_var))

    return types.SummaryResult(run_results=run_results, mttr=mttr,
                               degradation=degradation,
                               degradation_ratio=degradation_ratio,
                               downtime=downtime)