Exemplos de TooShort em Python, exemplos de algorithm_exceptions.TooShort em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: boundary_algorithms.py Projeto: wix-playground/skyline

def autoaggregate_ts(timeseries, autoaggregate_value):
    """
    This is a utility function used to autoaggregate a timeseries.  If a
    timeseries data set has 6 datapoints per minute but only one data value
    every minute then autoaggregate will aggregate every autoaggregate_value.
    """
    if ENABLE_BOUNDARY_DEBUG:
        logger.info('debug :: autoaggregate_ts at %s seconds' % str(autoaggregate_value))

    aggregated_timeseries = []

    if len(timeseries) < 60:
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: autoaggregate_ts - timeseries less than 60 datapoints, TooShort')
        raise TooShort()

    int_end_timestamp = int(timeseries[-1][0])
    last_hour = int_end_timestamp - 3600
    last_timestamp = int_end_timestamp
    next_timestamp = last_timestamp - int(autoaggregate_value)
    start_timestamp = last_hour

    if ENABLE_BOUNDARY_DEBUG:
        logger.info('debug :: autoaggregate_ts - aggregating from %s to %s' % (str(start_timestamp), str(int_end_timestamp)))

    valid_timestamps = False
    try:
        valid_timeseries = int_end_timestamp - start_timestamp
        if valid_timeseries == 3600:
            valid_timestamps = True
    except Exception as e:
        logger.error('Algorithm error: %s' % traceback.format_exc())
        logger.error('error: %e' % e)
        aggregated_timeseries = []
        return aggregated_timeseries

    if valid_timestamps:
        try:
            # Check sane variables otherwise we can just hang here in a while loop
            while int(next_timestamp) > int(start_timestamp):
                value = np.sum(scipy.array([int(x[1]) for x in timeseries if x[0] <= last_timestamp and x[0] > next_timestamp]))
                aggregated_timeseries += ((last_timestamp, value),)
                last_timestamp = next_timestamp
                next_timestamp = last_timestamp - autoaggregate_value
            aggregated_timeseries.reverse()
            return aggregated_timeseries
        except Exception as e:
            logger.error('Algorithm error: %s' % traceback.format_exc())
            logger.error('error: %e' % e)
            aggregated_timeseries = []
            return aggregated_timeseries
    else:
        logger.error('could not aggregate - timestamps not valid for aggregation')
        aggregated_timeseries = []
        return aggregated_timeseries

Exemplo n.º 2

0

Exibir arquivo

def run_selected_algorithm(timeseries, metric_name):
    """
    Filter timeseries and run selected algorithm.
    """

    # @added 20180807 - Feature #2492: alert on stale metrics
    # Determine if a metric has stopped sending data and if so add to the
    # analyzer.alert_on_stale_metrics Redis set
    if ALERT_ON_STALE_METRICS:
        add_to_alert_on_stale_metrics = False
        # @modified 20180816 - Feature #2492: alert on stale metrics
        # Added try and except to prevent some errors that are encounter between
        # 00:14 and 00:17 on some days
        # Traceback (most recent call last):
        # File "/opt/skyline/github/skyline/skyline/analyzer/analyzer.py", line 394, in spin_process
        # anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)
        # File "/opt/skyline/github/skyline/skyline/analyzer/algorithms.py", line 530, in run_selected_algorithm
        # if int(time()) - int(timeseries[-1][0]) >= ALERT_ON_STALE_PERIOD:
        # IndexError: list index out of range
        try:
            if int(time()) - int(timeseries[-1][0]) >= ALERT_ON_STALE_PERIOD:
                add_to_alert_on_stale_metrics = True
        except:
            # @modified 20180816 -
            #                      Feature #2492: alert on stale metrics
            add_to_alert_on_stale_metrics = False
        try:
            if int(time()) - int(timeseries[-1][0]) >= STALE_PERIOD:
                add_to_alert_on_stale_metrics = False
        except:
            add_to_alert_on_stale_metrics = False

        if add_to_alert_on_stale_metrics:
            try:
                redis_conn.ping()
            except:
                from redis import StrictRedis
                if REDIS_PASSWORD:
                    redis_conn = StrictRedis(
                        password=REDIS_PASSWORD,
                        unix_socket_path=REDIS_SOCKET_PATH)
                else:
                    redis_conn = StrictRedis(
                        unix_socket_path=REDIS_SOCKET_PATH)
            try:
                redis_conn.sadd('analyzer.alert_on_stale_metrics', metric_name)
            except:
                pass

    # Get rid of short series
    if len(timeseries) < MIN_TOLERABLE_LENGTH:
        raise TooShort()

    # Get rid of stale series
    if time() - timeseries[-1][0] > STALE_PERIOD:
        raise Stale()

    # Get rid of boring series
    if len(set(item[1] for item in
               timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE:
        raise Boring()

    # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method:
    # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
    # which runs all timeseries through all ALGORITHMS
    final_ensemble = []
    number_of_algorithms_triggered = 0
    number_of_algorithms_run = 0
    number_of_algorithms = len(ALGORITHMS)
    maximum_false_count = number_of_algorithms - CONSENSUS + 1
    # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count)))
    consensus_possible = True
    # DEVELOPMENT: this is for a development version of analyzer only
    if skyline_app == 'analyzer_dev':
        time_all_algorithms = True
    else:
        time_all_algorithms = False

    algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app)

    for algorithm in ALGORITHMS:
        if consensus_possible:

            if send_algorithm_run_metrics:
                algorithm_count_file = '%s%s.count' % (
                    algorithm_tmp_file_prefix, algorithm)
                algorithm_timings_file = '%s%s.timings' % (
                    algorithm_tmp_file_prefix, algorithm)

            run_algorithm = []
            run_algorithm.append(algorithm)
            number_of_algorithms_run += 1
            if send_algorithm_run_metrics:
                start = timer()
            try:
                algorithm_result = [
                    globals()[test_algorithm](timeseries)
                    for test_algorithm in run_algorithm
                ]
            except:
                # logger.error('%s failed' % (algorithm))
                algorithm_result = [None]

            if send_algorithm_run_metrics:
                end = timer()
                with open(algorithm_count_file, 'a') as f:
                    f.write('1\n')
                with open(algorithm_timings_file, 'a') as f:
                    f.write('%.6f\n' % (end - start))
        else:
            algorithm_result = [False]
            # logger.info('CONSENSUS NOT ACHIEVABLE - skipping %s' % (str(algorithm)))

        if algorithm_result.count(True) == 1:
            result = True
            number_of_algorithms_triggered += 1
            # logger.info('algorithm %s triggerred' % (str(algorithm)))
        elif algorithm_result.count(False) == 1:
            result = False
        elif algorithm_result.count(None) == 1:
            result = None
        else:
            result = False

        final_ensemble.append(result)

        if not RUN_OPTIMIZED_WORKFLOW:
            continue

        if time_all_algorithms:
            continue

        if ENABLE_ALL_ALGORITHMS_RUN_METRICS:
            continue

        # true_count = final_ensemble.count(True)
        # false_count = final_ensemble.count(False)
        # logger.info('current false_count %s' % (str(false_count)))

        if final_ensemble.count(False) >= maximum_false_count:
            consensus_possible = False
            # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count)))
            # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run
            # logger.info('skipping %s algorithms' % (str(skip_algorithms_count)))

    # logger.info('final_ensemble: %s' % (str(final_ensemble)))

    try:
        # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
        ensemble = final_ensemble

        threshold = len(ensemble) - CONSENSUS
        if ensemble.count(False) <= threshold:
            if ENABLE_SECOND_ORDER:
                if is_anomalously_anomalous(metric_name, ensemble,
                                            timeseries[-1][1]):
                    return True, ensemble, timeseries[-1][1]
            else:
                return True, ensemble, timeseries[-1][1]

        return False, ensemble, timeseries[-1][1]
    except:
        logger.error('Algorithm error: %s' % traceback.format_exc())
        return False, [], 1

Exemplo n.º 3

0

Exibir arquivo

Arquivo: boundary_algorithms.py Projeto: rsmahabir/skyline

def run_selected_algorithm(timeseries, metric_name, metric_expiration_time,
                           metric_min_average, metric_min_average_seconds,
                           metric_trigger, alert_threshold, metric_alerters,
                           autoaggregate, autoaggregate_value, algorithm):
    """
    Filter timeseries and run selected algorithm.
    """

    if ENABLE_BOUNDARY_DEBUG:
        logger.info('debug :: assigning in algoritms.py - %s, %s' %
                    (metric_name, algorithm))

    # Get rid of short series
    # @modified 20190312 - Task #2862: Allow Boundary to analyse short time series
    #                      https://github.com/earthgecko/skyline/issues/88
    # Allow class as TooShort if the algorithm is detect_drop_off_cliff
    if algorithm == 'detect_drop_off_cliff':
        if len(timeseries) < MIN_TOLERABLE_LENGTH:
            if ENABLE_BOUNDARY_DEBUG:
                logger.debug('debug :: TooShort - %s, %s' %
                             (metric_name, algorithm))
            raise TooShort()

    # Get rid of stale series
    if time() - timeseries[-1][0] > STALE_PERIOD:
        if ENABLE_BOUNDARY_DEBUG:
            logger.debug('debug :: Stale - %s, %s' % (metric_name, algorithm))
        raise Stale()

    # Get rid of boring series
    if algorithm == 'detect_drop_off_cliff' or algorithm == 'less_than':
        if len(set(item[1] for item in
                   timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE:
            if ENABLE_BOUNDARY_DEBUG:
                logger.debug('debug :: Boring - %s, %s' %
                             (metric_name, algorithm))
            raise Boring()

    if autoaggregate:
        if ENABLE_BOUNDARY_DEBUG:
            logger.debug('debug :: auto aggregating %s for %s' %
                         (metric_name, algorithm))
        try:
            agg_timeseries = autoaggregate_ts(timeseries, autoaggregate_value)
            if ENABLE_BOUNDARY_DEBUG:
                logger.debug(
                    'debug :: aggregated_timeseries returned %s for %s' %
                    (metric_name, algorithm))
        except Exception as e:
            agg_timeseries = []
            if ENABLE_BOUNDARY_DEBUG:
                logger.error('Algorithm error: %s' % traceback.format_exc())
                logger.error('error: %e' % e)
                logger.debug(
                    'debug error - autoaggregate excpection %s for %s' %
                    (metric_name, algorithm))

        if len(agg_timeseries) > 10:
            timeseries = agg_timeseries
        else:
            if ENABLE_BOUNDARY_DEBUG:
                logger.debug('debug :: TooShort - %s, %s' %
                             (metric_name, algorithm))
            raise TooShort()

    # @modified 20190312 - Task #2862: Allow Boundary to analyse short time series
    #                      https://github.com/earthgecko/skyline/issues/88
    # if len(timeseries) < 10:
    if len(timeseries) < 1:
        if ENABLE_BOUNDARY_DEBUG:
            logger.debug(
                'debug :: timeseries too short - %s - timeseries length - %s' %
                (metric_name, str(len(timeseries))))
        raise TooShort()

    try:
        ensemble = [
            globals()[algorithm](timeseries, metric_name,
                                 metric_expiration_time, metric_min_average,
                                 metric_min_average_seconds, metric_trigger)
        ]
        if ensemble.count(True) == 1:
            if ENABLE_BOUNDARY_DEBUG:
                logger.debug(
                    # @modified 20200624 - Task #3594: Add timestamp to ENABLE_BOUNDARY_DEBUG output
                    #                      Feature #3532: Sort all time series
                    # Added timestamp to debug output
                    # 'debug :: anomalous datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s' % (
                    #     str(timeseries[-1][1]),
                    'debug :: anomalous at %s with datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s'
                    %
                    (str(timeseries[-1][0]), str(timeseries[-1][1]),
                     str(metric_name), str(metric_expiration_time),
                     str(metric_min_average), str(metric_min_average_seconds),
                     str(metric_trigger), str(alert_threshold),
                     str(metric_alerters), str(algorithm)))
            return True, ensemble, timeseries[-1][
                1], metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm
        else:
            if ENABLE_BOUNDARY_DEBUG:
                logger.debug(
                    # @modified 20200624 - Task #3594: Add timestamp to ENABLE_BOUNDARY_DEBUG output
                    #                      Feature #3532: Sort all time series
                    # Added timestamp to debug output
                    # 'debug :: not anomalous datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s' % (
                    #     str(timeseries[-1][1]),
                    'debug :: not anomalous at %s with datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s'
                    %
                    (str(timeseries[-1][0]), str(timeseries[-1][1]),
                     str(metric_name), str(metric_expiration_time),
                     str(metric_min_average), str(metric_min_average_seconds),
                     str(metric_trigger), str(alert_threshold),
                     str(metric_alerters), str(algorithm)))
            return False, ensemble, timeseries[-1][
                1], metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm
    except:
        logger.error('Algorithm error: %s' % traceback.format_exc())
        return False, [], 1, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm

Exemplo n.º 4

0

Exibir arquivo

Arquivo: algorithms_batch.py Projeto: rsmahabir/skyline

def run_selected_batch_algorithm(timeseries, metric_name,
                                 run_negatives_present):
    """
    Filter timeseries and run selected algorithm.
    """

    try:
        from settings import BATCH_PROCESSING_STALE_PERIOD
        # @modified 20200816 - Feature #3678:  SNAB - anomalyScore
        # Renamed to avoid confusion
        # STALE_PERIOD = int(BATCH_PROCESSING_STALE_PERIOD)
        BATCH_PROCESSING_STALE_PERIOD = int(BATCH_PROCESSING_STALE_PERIOD)
    except:
        BATCH_PROCESSING_STALE_PERIOD = 86400

    # Get rid of short series
    if len(timeseries) < MIN_TOLERABLE_LENGTH:
        raise TooShort()

    # Get rid of stale series
    # @modified 20200816 - Feature #3678:  SNAB - anomalyScore
    # Renamed to avoid confusion
    # if time() - timeseries[-1][0] > BATCH_PROCESSING_STALE_PERIOD:
    if time() - timeseries[-1][0] > BATCH_PROCESSING_STALE_PERIOD:
        raise Stale()

    # Get rid of boring series
    if len(set(item[1] for item in
               timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE:
        raise Boring()

    # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method:
    # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
    # which runs all timeseries through all ALGORITHMS
    final_ensemble = []
    number_of_algorithms_triggered = 0
    number_of_algorithms_run = 0
    number_of_algorithms = len(ALGORITHMS)
    maximum_false_count = number_of_algorithms - CONSENSUS + 1
    # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count)))
    consensus_possible = True

    time_all_algorithms = False

    algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app)

    # @added 20200607 - Feature #3566: custom_algorithms
    algorithms_run = []
    custom_consensus_override = False
    custom_consensus_values = []
    run_3sigma_algorithms = True
    run_3sigma_algorithms_overridden_by = []
    custom_algorithm = None
    # @modified 20200817 - Bug #3652: Handle multiple metrics in base_name conversion
    # base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
    if metric_name.startswith(FULL_NAMESPACE):
        base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
    else:
        base_name = metric_name
    if CUSTOM_ALGORITHMS:
        custom_algorithms_to_run = {}
        try:
            custom_algorithms_to_run = get_custom_algorithms_to_run(
                skyline_app, base_name, CUSTOM_ALGORITHMS,
                DEBUG_CUSTOM_ALGORITHMS)
            if DEBUG_CUSTOM_ALGORITHMS:
                if custom_algorithms_to_run:
                    logger.debug(
                        'algorithms :: debug :: custom algorithms ARE RUN on %s'
                        % (str(base_name)))
        except:
            logger.error('error :: get_custom_algorithms_to_run :: %s' %
                         traceback.format_exc())
            custom_algorithms_to_run = {}
        for custom_algorithm in custom_algorithms_to_run:
            if consensus_possible:
                algorithm = custom_algorithm
                debug_logging = False
                try:
                    debug_logging = custom_algorithms_to_run[custom_algorithm][
                        'debug_logging']
                except:
                    debug_logging = False
                if DEBUG_CUSTOM_ALGORITHMS:
                    debug_logging = True
                if send_algorithm_run_metrics:
                    algorithm_count_file = '%s%s.count' % (
                        algorithm_tmp_file_prefix, algorithm)
                    algorithm_timings_file = '%s%s.timings' % (
                        algorithm_tmp_file_prefix, algorithm)
                run_algorithm = []
                run_algorithm.append(algorithm)
                number_of_algorithms += 1
                number_of_algorithms_run += 1
                if send_algorithm_run_metrics:
                    start = timer()
                if DEBUG_CUSTOM_ALGORITHMS or debug_logging:
                    logger.debug(
                        'debug :: algorithms :: running custom algorithm %s on %s'
                        % (str(algorithm), str(base_name)))
                    start_debug_timer = timer()
                run_custom_algorithm_on_timeseries = None
                try:
                    from custom_algorithms import run_custom_algorithm_on_timeseries
                    if DEBUG_CUSTOM_ALGORITHMS or debug_logging:
                        logger.debug(
                            'debug :: algorithms :: loaded run_custom_algorithm_on_timeseries'
                        )
                except:
                    if DEBUG_CUSTOM_ALGORITHMS or debug_logging:
                        logger.error(traceback.format_exc())
                        logger.error(
                            'error :: algorithms :: failed to load run_custom_algorithm_on_timeseries'
                        )
                result = None
                anomalyScore = None
                if run_custom_algorithm_on_timeseries:
                    try:
                        result, anomalyScore = run_custom_algorithm_on_timeseries(
                            skyline_app, getpid(), base_name, timeseries,
                            custom_algorithm,
                            custom_algorithms_to_run[custom_algorithm],
                            DEBUG_CUSTOM_ALGORITHMS)
                        algorithm_result = [result]
                        if DEBUG_CUSTOM_ALGORITHMS or debug_logging:
                            logger.debug(
                                'debug :: algorithms :: run_custom_algorithm_on_timeseries run with result - %s, anomalyScore - %s'
                                % (str(result), str(anomalyScore)))
                    except:
                        if DEBUG_CUSTOM_ALGORITHMS or debug_logging:
                            logger.error(traceback.format_exc())
                            logger.error(
                                'error :: algorithms :: failed to run custom_algorithm %s on %s'
                                % (custom_algorithm, base_name))
                        result = None
                        algorithm_result = [None]
                else:
                    if DEBUG_CUSTOM_ALGORITHMS or debug_logging:
                        logger.error(
                            'error :: debug :: algorithms :: run_custom_algorithm_on_timeseries was not loaded so was not run'
                        )
                if DEBUG_CUSTOM_ALGORITHMS or debug_logging:
                    end_debug_timer = timer()
                    logger.debug(
                        'debug :: algorithms :: ran custom algorithm %s on %s with result of (%s, %s) in %.6f seconds'
                        % (str(algorithm), str(base_name), str(result),
                           str(anomalyScore),
                           (end_debug_timer - start_debug_timer)))
                algorithms_run.append(algorithm)
                if send_algorithm_run_metrics:
                    end = timer()
                    with open(algorithm_count_file, 'a') as f:
                        f.write('1\n')
                    with open(algorithm_timings_file, 'a') as f:
                        f.write('%.6f\n' % (end - start))
            else:
                algorithm_result = [None]
                algorithms_run.append(algorithm)

            if algorithm_result.count(True) == 1:
                result = True
                number_of_algorithms_triggered += 1
            elif algorithm_result.count(False) == 1:
                result = False
            elif algorithm_result.count(None) == 1:
                result = None
            else:
                result = False
            final_ensemble.append(result)
            custom_consensus = None
            algorithms_allowed_in_consensus = []
            # @added 20200605 - Feature #3566: custom_algorithms
            # Allow only single or multiple custom algorithms to run and allow
            # the a custom algorithm to specify not to run 3sigma aglorithms
            custom_run_3sigma_algorithms = True
            try:
                custom_run_3sigma_algorithms = custom_algorithms_to_run[
                    custom_algorithm]['run_3sigma_algorithms']
            except:
                custom_run_3sigma_algorithms = True
            if not custom_run_3sigma_algorithms and result:
                run_3sigma_algorithms = False
                run_3sigma_algorithms_overridden_by.append(custom_algorithm)
                if DEBUG_CUSTOM_ALGORITHMS or debug_logging:
                    logger.debug(
                        'debug :: algorithms :: run_3sigma_algorithms is False on %s for %s'
                        % (custom_algorithm, base_name))
            if result:
                try:
                    custom_consensus = custom_algorithms_to_run[
                        custom_algorithm]['consensus']
                    if custom_consensus == 0:
                        custom_consensus = int(CONSENSUS)
                    else:
                        custom_consensus_values.append(custom_consensus)
                except:
                    custom_consensus = int(CONSENSUS)
                try:
                    algorithms_allowed_in_consensus = custom_algorithms_to_run[
                        custom_algorithm]['algorithms_allowed_in_consensus']
                except:
                    algorithms_allowed_in_consensus = []
                if custom_consensus == 1:
                    consensus_possible = False
                    custom_consensus_override = True
                    logger.info(
                        'algorithms :: overidding the CONSENSUS as custom algorithm %s overides on %s'
                        % (str(algorithm), str(base_name)))
                # TODO - figure out how to handle consensus overrides if
                #        multiple custom algorithms are used
    if DEBUG_CUSTOM_ALGORITHMS:
        if not run_3sigma_algorithms:
            logger.debug('algorithms :: not running 3 sigma algorithms')
        if len(run_3sigma_algorithms_overridden_by) > 0:
            logger.debug(
                'algorithms :: run_3sigma_algorithms overridden by %s' %
                (str(run_3sigma_algorithms_overridden_by)))

    # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics
    # Added negatives_found
    negatives_found = False

    # @added 20200817 - Feature #3684: ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS
    #                   Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS
    #                   Feature #3480: batch_processing
    #                   Feature #3678:  SNAB - anomalyScore
    # Allow for custom durations on namespaces
    use_full_duration = int(FULL_DURATION) + 0
    if ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS:
        for metric_namespace, custom_full_duration in ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS:
            if metric_namespace in base_name:
                use_full_duration = custom_full_duration
    detect_drop_off_cliff_trigger = False

    for algorithm in ALGORITHMS:
        # @modified 20200607 - Feature #3566: custom_algorithms
        # Added run_3sigma_algorithms to allow only single or multiple custom
        # algorithms to run and allow the a custom algorithm to specify not to
        # run 3sigma aglorithms.
        # if consensus_possible:
        if consensus_possible and run_3sigma_algorithms:
            if send_algorithm_run_metrics:
                algorithm_count_file = '%s%s.count' % (
                    algorithm_tmp_file_prefix, algorithm)
                algorithm_timings_file = '%s%s.timings' % (
                    algorithm_tmp_file_prefix, algorithm)

            run_algorithm = []
            run_algorithm.append(algorithm)
            number_of_algorithms_run += 1
            if send_algorithm_run_metrics:
                start = timer()
            try:
                # @added 20200817 - Feature #3684: ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS
                #                   Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS
                #                   Feature #3480: batch_processing
                #                   Feature #3678:  SNAB - anomalyScore
                # Allow for custom durations on namespaces
                # algorithm_result = [globals()[test_algorithm](timeseries) for test_algorithm in run_algorithm]
                algorithm_result = [
                    globals()[test_algorithm](timeseries, use_full_duration)
                    for test_algorithm in run_algorithm
                ]
            except:
                # logger.error('%s failed' % (algorithm))
                algorithm_result = [None]

            # @added 20200607 - Feature #3566: custom_algorithms
            algorithms_run.append(algorithm)

            if send_algorithm_run_metrics:
                end = timer()
                with open(algorithm_count_file, 'a') as f:
                    f.write('1\n')
                with open(algorithm_timings_file, 'a') as f:
                    f.write('%.6f\n' % (end - start))
        else:
            algorithm_result = [None]
            algorithms_run.append(algorithm)

        if algorithm_result.count(True) == 1:
            result = True
            number_of_algorithms_triggered += 1
            # logger.info('algorithm %s triggerred' % (str(algorithm)))
        elif algorithm_result.count(False) == 1:
            result = False
        elif algorithm_result.count(None) == 1:
            result = None
        else:
            result = False

        final_ensemble.append(result)

        if not RUN_OPTIMIZED_WORKFLOW:
            continue

        if time_all_algorithms:
            continue

        if ENABLE_ALL_ALGORITHMS_RUN_METRICS:
            continue

        # true_count = final_ensemble.count(True)
        # false_count = final_ensemble.count(False)
        # logger.info('current false_count %s' % (str(false_count)))

        if final_ensemble.count(False) >= maximum_false_count:
            consensus_possible = False
            # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count)))
            # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run
            # logger.info('skipping %s algorithms' % (str(skip_algorithms_count)))

    # logger.info('final_ensemble: %s' % (str(final_ensemble)))

    try:
        # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
        ensemble = final_ensemble

        # @modified 20200607 - Feature #3566: custom_algorithms
        # threshold = len(ensemble) - CONSENSUS
        if custom_consensus_override:
            threshold = len(ensemble) - 1
        else:
            threshold = len(ensemble) - CONSENSUS

        if ensemble.count(False) <= threshold:

            # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics
            # Only run a negatives_present check if it is anomalous, there
            # is no need to check unless it is related to an anomaly
            if run_negatives_present:
                try:
                    # @added 20200817 - Feature #3684: ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS
                    #                   Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS
                    #                   Feature #3480: batch_processing
                    #                   Feature #3678:  SNAB - anomalyScore
                    # Allow for custom durations on namespaces
                    # negatives_found = negatives_present(timeseries)
                    negatives_found = negatives_present(
                        timeseries, use_full_duration)
                except:
                    logger.error('Algorithm error: negatives_present :: %s' %
                                 traceback.format_exc())
                    negatives_found = False

            # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics
            # return True, ensemble, timeseries[-1][1]
            # @modified 20200607 - Feature #3566: custom_algorithms
            # Added algorithms_run
            # return True, ensemble, timeseries[-1][1], negatives_found
            # @modified 20200815 - Feature #3678: SNAB - anomalyScore
            # Added the number_of_algorithms to calculate anomalyScore from
            # return True, ensemble, timeseries[-1][1], negatives_found, algorithms_run
            return True, ensemble, timeseries[-1][
                1], negatives_found, algorithms_run, number_of_algorithms

        # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics
        # return False, ensemble, timeseries[-1][1]
        # @modified 20200607 - Feature #3566: custom_algorithms
        # Added algorithms_run
        # @modified 20200815 - Feature #3678: SNAB - anomalyScore
        # Added the number_of_algorithms to calculate anomalyScore from
        # return False, ensemble, timeseries[-1][1], negatives_found, algorithms_run
        return False, ensemble, timeseries[-1][
            1], negatives_found, algorithms_run, number_of_algorithms
    except:
        logger.error('Algorithm error: %s' % traceback.format_exc())
        # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics
        # return False, [], 1
        # @modified 20200607 - Feature #3566: custom_algorithms
        # Added algorithms_run
        # return False, ensemble, timeseries[-1][1], negatives_found, algorithms_run
        # @modified 20200815 - Feature #3678: SNAB - anomalyScore
        # Added the number_of_algorithms to calculate anomalyScore from
        # return False, [], 1, negatives_found, algorithms_run
        return False, [], 1, negatives_found, algorithms_run, 0

Exemplo n.º 5

0

Exibir arquivo

Arquivo: algorithms_batch.py Projeto: vpiduri/skyline

def run_selected_batch_algorithm(timeseries, metric_name,
                                 run_negatives_present):
    """
    Filter timeseries and run selected algorithm.
    """

    try:
        from settings import BATCH_PROCESSING_STALE_PERIOD
        STALE_PERIOD = BATCH_PROCESSING_STALE_PERIOD
    except:
        STALE_PERIOD = 86400

    # Get rid of short series
    if len(timeseries) < MIN_TOLERABLE_LENGTH:
        raise TooShort()

    # Get rid of stale series
    if time() - timeseries[-1][0] > STALE_PERIOD:
        raise Stale()

    # Get rid of boring series
    if len(set(item[1] for item in
               timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE:
        raise Boring()

    # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method:
    # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
    # which runs all timeseries through all ALGORITHMS
    final_ensemble = []
    number_of_algorithms_triggered = 0
    number_of_algorithms_run = 0
    number_of_algorithms = len(ALGORITHMS)
    maximum_false_count = number_of_algorithms - CONSENSUS + 1
    # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count)))
    consensus_possible = True

    time_all_algorithms = False

    algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app)

    # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics
    # Added negatives_found
    negatives_found = False

    for algorithm in ALGORITHMS:
        if consensus_possible:

            if send_algorithm_run_metrics:
                algorithm_count_file = '%s%s.count' % (
                    algorithm_tmp_file_prefix, algorithm)
                algorithm_timings_file = '%s%s.timings' % (
                    algorithm_tmp_file_prefix, algorithm)

            run_algorithm = []
            run_algorithm.append(algorithm)
            number_of_algorithms_run += 1
            if send_algorithm_run_metrics:
                start = timer()
            try:
                algorithm_result = [
                    globals()[test_algorithm](timeseries)
                    for test_algorithm in run_algorithm
                ]
            except:
                # logger.error('%s failed' % (algorithm))
                algorithm_result = [None]

            if send_algorithm_run_metrics:
                end = timer()
                with open(algorithm_count_file, 'a') as f:
                    f.write('1\n')
                with open(algorithm_timings_file, 'a') as f:
                    f.write('%.6f\n' % (end - start))
        else:
            algorithm_result = [False]
            # logger.info('CONSENSUS NOT ACHIEVABLE - skipping %s' % (str(algorithm)))

        if algorithm_result.count(True) == 1:
            result = True
            number_of_algorithms_triggered += 1
            # logger.info('algorithm %s triggerred' % (str(algorithm)))
        elif algorithm_result.count(False) == 1:
            result = False
        elif algorithm_result.count(None) == 1:
            result = None
        else:
            result = False

        final_ensemble.append(result)

        if not RUN_OPTIMIZED_WORKFLOW:
            continue

        if time_all_algorithms:
            continue

        if ENABLE_ALL_ALGORITHMS_RUN_METRICS:
            continue

        # true_count = final_ensemble.count(True)
        # false_count = final_ensemble.count(False)
        # logger.info('current false_count %s' % (str(false_count)))

        if final_ensemble.count(False) >= maximum_false_count:
            consensus_possible = False
            # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count)))
            # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run
            # logger.info('skipping %s algorithms' % (str(skip_algorithms_count)))

    # logger.info('final_ensemble: %s' % (str(final_ensemble)))

    try:
        # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
        ensemble = final_ensemble

        threshold = len(ensemble) - CONSENSUS
        if ensemble.count(False) <= threshold:

            # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics
            # Only run a negatives_present check if it is anomalous, there
            # is no need to check unless it is related to an anomaly
            if run_negatives_present:
                try:
                    negatives_found = negatives_present(timeseries)
                except:
                    logger.error('Algorithm error: negatives_present :: %s' %
                                 traceback.format_exc())
                    negatives_found = False

            # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics
            # return True, ensemble, timeseries[-1][1]
            return True, ensemble, timeseries[-1][1], negatives_found

        # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics
        # return False, ensemble, timeseries[-1][1]
        return False, ensemble, timeseries[-1][1], negatives_found
    except:
        logger.error('Algorithm error: %s' % traceback.format_exc())
        # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics
        # return False, [], 1
        return False, [], 1, negatives_found

Exemplo n.º 6

0

Exibir arquivo

def run_selected_algorithm(timeseries, metric_name):
    """
    Filter timeseries and run selected algorithm.
    """
    # Get rid of short series
    if len(timeseries) < MIN_TOLERABLE_LENGTH:
        raise TooShort()

    # Get rid of stale series
    if time() - timeseries[-1][0] > STALE_PERIOD:
        raise Stale()

    # Get rid of boring series
    if len(set(item[1] for item in
               timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE:
        raise Boring()

    # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method:
    # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
    # which runs all timeseries through all ALGORITHMS
    final_ensemble = []
    number_of_algorithms_triggered = 0
    number_of_algorithms_run = 0
    number_of_algorithms = len(ALGORITHMS)
    maximum_false_count = number_of_algorithms - CONSENSUS + 1
    # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count)))
    consensus_possible = True
    # DEVELOPMENT: this is for a development version of analyzer only
    if skyline_app == 'analyzer_dev':
        time_all_algorithms = True
    else:
        time_all_algorithms = False

    algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app)

    for algorithm in ALGORITHMS:
        if consensus_possible:

            if send_algorithm_run_metrics:
                algorithm_count_file = '%s%s.count' % (
                    algorithm_tmp_file_prefix, algorithm)
                algorithm_timings_file = '%s%s.timings' % (
                    algorithm_tmp_file_prefix, algorithm)

            run_algorithm = []
            run_algorithm.append(algorithm)
            number_of_algorithms_run += 1
            if send_algorithm_run_metrics:
                start = timer()
            try:
                algorithm_result = [
                    globals()[test_algorithm](timeseries)
                    for test_algorithm in run_algorithm
                ]
            except:
                # logger.error('%s failed' % (algorithm))
                algorithm_result = [None]

            if send_algorithm_run_metrics:
                end = timer()
                with open(algorithm_count_file, 'a') as f:
                    f.write('1\n')
                with open(algorithm_timings_file, 'a') as f:
                    f.write('%.6f\n' % (end - start))
        else:
            algorithm_result = [False]
            # logger.info('CONSENSUS NOT ACHIEVABLE - skipping %s' % (str(algorithm)))

        if algorithm_result.count(True) == 1:
            result = True
            number_of_algorithms_triggered += 1
            # logger.info('algorithm %s triggerred' % (str(algorithm)))
        elif algorithm_result.count(False) == 1:
            result = False
        elif algorithm_result.count(None) == 1:
            result = None
        else:
            result = False

        final_ensemble.append(result)

        if not RUN_OPTIMIZED_WORKFLOW:
            continue

        if time_all_algorithms:
            continue

        if ENABLE_ALL_ALGORITHMS_RUN_METRICS:
            continue

        # true_count = final_ensemble.count(True)
        # false_count = final_ensemble.count(False)
        # logger.info('current false_count %s' % (str(false_count)))

        if final_ensemble.count(False) >= maximum_false_count:
            consensus_possible = False
            # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count)))
            # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run
            # logger.info('skipping %s algorithms' % (str(skip_algorithms_count)))

    # logger.info('final_ensemble: %s' % (str(final_ensemble)))

    try:
        # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
        ensemble = final_ensemble

        threshold = len(ensemble) - CONSENSUS
        if ensemble.count(False) <= threshold:
            if ENABLE_SECOND_ORDER:
                if is_anomalously_anomalous(metric_name, ensemble,
                                            timeseries[-1][1]):
                    return True, ensemble, timeseries[-1][1]
            else:
                return True, ensemble, timeseries[-1][1]

        return False, ensemble, timeseries[-1][1]
    except:
        logger.error('Algorithm error: %s' % traceback.format_exc())
        return False, [], 1

Exemplo n.º 7

0

Exibir arquivo

def run_selected_algorithm(timeseries, metric_name, airgapped_metrics,
                           airgapped_metrics_filled, run_negatives_present,
                           check_for_airgaps_only):
    """
    Filter timeseries and run selected algorithm.
    """

    # @added 20180807 - Feature #2492: alert on stale metrics
    # Determine if a metric has stopped sending data and if so add to the
    # analyzer.alert_on_stale_metrics Redis set
    add_to_alert_on_stale_metrics = False
    if ALERT_ON_STALE_METRICS:
        # @modified 20180816 - Feature #2492: alert on stale metrics
        # Added try and except to prevent some errors that are encounter between
        # 00:14 and 00:17 on some days
        # Traceback (most recent call last):
        # File "/opt/skyline/github/skyline/skyline/analyzer/analyzer.py", line 394, in spin_process
        # anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)
        # File "/opt/skyline/github/skyline/skyline/analyzer/algorithms.py", line 530, in run_selected_algorithm
        # if int(time()) - int(timeseries[-1][0]) >= ALERT_ON_STALE_PERIOD:
        # IndexError: list index out of range
        try:
            if int(time()) - int(timeseries[-1][0]) >= ALERT_ON_STALE_PERIOD:
                add_to_alert_on_stale_metrics = True
        except:
            # @modified 20180816 -
            #                      Feature #2492: alert on stale metrics
            add_to_alert_on_stale_metrics = False
        try:
            if int(time()) - int(timeseries[-1][0]) >= STALE_PERIOD:
                add_to_alert_on_stale_metrics = False
        except:
            add_to_alert_on_stale_metrics = False

        if add_to_alert_on_stale_metrics:
            try:
                # @added 20200505 - Feature #3504: Handle airgaps in batch metrics
                # Use get_redis_conn
                from skyline_functions import get_redis_conn
                redis_conn = get_redis_conn(skyline_app)
                redis_conn.sadd('analyzer.alert_on_stale_metrics', metric_name)
            except:
                pass

    # @added 20200505 - Feature #3504: Handle airgaps in batch metrics
    # Check to see if this is a batch processing metric that has been sent
    # through Analyzer to check for airgaps only and if so do not check the
    # timeseries for exceptions
    check_for_timeseries_exceptions = True
    check_airgap_only = None
    if BATCH_PROCESSING and check_for_airgaps_only:
        check_airgap_only_key = 'analyzer.check_airgap_only.%s' % metric_name
        try:
            if not add_to_alert_on_stale_metrics:
                # @added 20200505 - Feature #3504: Handle airgaps in batch metrics
                # Use get_redis_conn
                from skyline_functions import get_redis_conn
                redis_conn = get_redis_conn(skyline_app)
            check_airgap_only = redis_conn.get(check_airgap_only_key)
        except:
            check_airgap_only = None
        if check_airgap_only:
            check_for_timeseries_exceptions = False

    # @modified 20200505 - Feature #3504: Handle airgaps in batch metrics
    # Wrapped in check_for_timeseries_exceptions as if it is a check_airgap_only
    # metric then the time series should not be checked for exceptions
    if check_for_timeseries_exceptions:
        # Get rid of short series
        if len(timeseries) < MIN_TOLERABLE_LENGTH:
            raise TooShort()

        # Get rid of stale series
        if time() - timeseries[-1][0] > STALE_PERIOD:
            raise Stale()

        # Get rid of boring series
        if len(set(item[1] for item in
                   timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE:
            raise Boring()

    # @added 20200423 - Feature #3508: ionosphere.untrainable_metrics
    # Added run_negatives_present
    negatives_found = False

    # @added 20200117 - Feature #3400: Identify air gaps in the metric data
    # @modified 20200214 - Bug #3448: Repeated airgapped_metrics
    #                      Feature #3400: Identify air gaps in the metric data
    # if IDENTIFY_AIRGAPS:
    if IDENTIFY_AIRGAPS or IDENTIFY_UNORDERED_TIMESERIES:
        # airgaps = identify_airgaps(metric_name, timeseries, airgapped_metrics)
        # if airgaps:
        process_metric = True
        if IDENTIFY_AIRGAPS:
            if CHECK_AIRGAPS:
                process_metric = False

                # @added 20200423 - Feature #3504: Handle airgaps in batch metrics
                #                   Feature #3400: Identify air gaps in the metric data
                # Replaced code block below to determine if a metric is a check
                # with a skyline_functions definition of that block as
                # the check_metric_for_airgaps function
                check_metric_for_airgaps = False
                try:
                    check_metric_for_airgaps = is_check_airgap_metric(
                        metric_name)
                except:
                    check_metric_for_airgaps = False
                    try:
                        logger.error(
                            'failed to determine if %s is an airgap metric: %s'
                            % (str(metric_name), traceback.format_exc()))
                    except:
                        logger.error(
                            'failed to determine if the metric is an airgap metric'
                        )
                if check_metric_for_airgaps:
                    process_metric = True
        else:
            # If IDENTIFY_AIRGAPS is not enabled and
            # IDENTIFY_UNORDERED_TIMESERIES is enabled process the metric
            if IDENTIFY_UNORDERED_TIMESERIES:
                process_metric = True
        airgaps = None
        unordered_timeseries = False
        if process_metric:
            # @modified 20200501 - Feature #3400: Identify air gaps in the metric data
            # Added airgapped_metrics_filled
            # airgaps, unordered_timeseries = identify_airgaps(metric_name, timeseries, airgapped_metrics)
            airgaps, unordered_timeseries = identify_airgaps(
                metric_name, timeseries, airgapped_metrics,
                airgapped_metrics_filled)
        if airgaps or unordered_timeseries:
            try:
                redis_conn.ping()
            except:
                # @added 20200505 - Feature #3504: Handle airgaps in batch metrics
                # Use get_redis_conn
                from skyline_functions import get_redis_conn
                redis_conn = get_redis_conn(skyline_app)
        if airgaps:
            for i in airgaps:
                try:
                    redis_conn.sadd('analyzer.airgapped_metrics', str(i))
                    logger.info('adding airgap %s' % str(i))
                    # TODO: learn_airgapped_metrics
                except:
                    pass
            del airgaps

        # @added 20200214 - Bug #3448: Repeated airgapped_metrics
        #                   Feature #3400: Identify air gaps in the metric data
        # Also add unordered time series to the analyzer.unordered_timeseries
        # Redis set
        if unordered_timeseries:
            try:
                redis_conn.sadd('analyzer.unordered_timeseries', metric_name)
                del unorder_timeseries
            except:
                pass

    # @added 20200423 - Feature #3504: Handle airgaps in batch metrics
    #                   Feature #3480: batch_processing
    #                   Feature #3486: analyzer_batch
    #                   Feature #3400: Identify air gaps in the metric data
    # Check to see if this is a batch processing metric that has been sent to
    # analyzer_batch for processing but sent through Analyzer to check for
    # airgaps only and if so return as it should not be run through algorithms
    if BATCH_PROCESSING:
        if check_airgap_only:
            try:
                redis_conn.delete(check_airgap_only_key)
            except:
                try:
                    logger.error(
                        'failed to delete Redis key %s: %s' %
                        (str(check_airgap_only_key), traceback.format_exc()))
                except:
                    logger.error(
                        'failed to failure regarding deleting the check_airgap_only_key Redis key'
                    )
            # @modified 20200430 - Feature #3480: batch_processing
            # Tidy up and reduce logging, only log if debug enabled
            if BATCH_PROCESSING_DEBUG:
                logger.info(
                    'algorithms :: batch processing - batch metric %s checked for airgaps only, not analysing'
                    % (str(metric_name)))

            # TODO: the only worry here is that this metric then gets added to
            # the not_anomalous Redis set?  Not sure if that is a problem, I do
            # not think it is.  Unless it is in the end of anomaly_end_timestamp
            # context?
            # @modified 20200424 - Feature #3508: ionosphere.untrainable_metrics
            # Added negatives_found
            return False, [], 1, negatives_found

    # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method:
    # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
    # which runs all timeseries through all ALGORITHMS
    final_ensemble = []
    number_of_algorithms_triggered = 0
    number_of_algorithms_run = 0
    number_of_algorithms = len(ALGORITHMS)
    maximum_false_count = number_of_algorithms - CONSENSUS + 1
    # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count)))
    consensus_possible = True
    # DEVELOPMENT: this is for a development version of analyzer only
    if skyline_app == 'analyzer_dev':
        time_all_algorithms = True
    else:
        time_all_algorithms = False

    algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app)

    for algorithm in ALGORITHMS:
        if consensus_possible:

            if send_algorithm_run_metrics:
                algorithm_count_file = '%s%s.count' % (
                    algorithm_tmp_file_prefix, algorithm)
                algorithm_timings_file = '%s%s.timings' % (
                    algorithm_tmp_file_prefix, algorithm)

            run_algorithm = []
            run_algorithm.append(algorithm)
            number_of_algorithms_run += 1
            if send_algorithm_run_metrics:
                start = timer()
            try:
                algorithm_result = [
                    globals()[test_algorithm](timeseries)
                    for test_algorithm in run_algorithm
                ]
            except:
                # logger.error('%s failed' % (algorithm))
                algorithm_result = [None]

            if send_algorithm_run_metrics:
                end = timer()
                with open(algorithm_count_file, 'a') as f:
                    f.write('1\n')
                with open(algorithm_timings_file, 'a') as f:
                    f.write('%.6f\n' % (end - start))
        else:
            algorithm_result = [False]
            # logger.info('CONSENSUS NOT ACHIEVABLE - skipping %s' % (str(algorithm)))

        if algorithm_result.count(True) == 1:
            result = True
            number_of_algorithms_triggered += 1
            # logger.info('algorithm %s triggerred' % (str(algorithm)))
        elif algorithm_result.count(False) == 1:
            result = False
        elif algorithm_result.count(None) == 1:
            result = None
        else:
            result = False

        final_ensemble.append(result)

        if not RUN_OPTIMIZED_WORKFLOW:
            continue

        if time_all_algorithms:
            continue

        if ENABLE_ALL_ALGORITHMS_RUN_METRICS:
            continue

        # true_count = final_ensemble.count(True)
        # false_count = final_ensemble.count(False)
        # logger.info('current false_count %s' % (str(false_count)))

        if final_ensemble.count(False) >= maximum_false_count:
            consensus_possible = False
            # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count)))
            # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run
            # logger.info('skipping %s algorithms' % (str(skip_algorithms_count)))

    # logger.info('final_ensemble: %s' % (str(final_ensemble)))

    try:
        # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS]
        ensemble = final_ensemble

        threshold = len(ensemble) - CONSENSUS
        if ensemble.count(False) <= threshold:

            # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics
            # Only run a negatives_present check if it is anomalous, there
            # is no need to check unless it is related to an anomaly
            if run_negatives_present:
                try:
                    negatives_found = negatives_present(timeseries)
                except:
                    logger.error('Algorithm error: negatives_present :: %s' %
                                 traceback.format_exc())
                    negatives_found = False

            if ENABLE_SECOND_ORDER:
                if is_anomalously_anomalous(metric_name, ensemble,
                                            timeseries[-1][1]):
                    # @modified 20200423 - Feature #3508: ionosphere.untrainable_metrics
                    # Added negatives_found
                    return True, ensemble, timeseries[-1][1], negatives_found
            else:
                return True, ensemble, timeseries[-1][1], negatives_found

        # @modified 20200423 - Feature #3508: ionosphere.untrainable_metrics
        # Added negatives_found
        return False, ensemble, timeseries[-1][1], negatives_found
    except:
        logger.error('Algorithm error: %s' % traceback.format_exc())
        # @modified 20200423 - Feature #3508: ionosphere.untrainable_metrics
        # Added negatives_found
        return False, [], 1, negatives_found

Exemplo n.º 8

0

Exibir arquivo

Arquivo: boundary_algorithms.py Projeto: filipe-plutoflume/skyline-1

def run_selected_algorithm(timeseries, metric_name, metric_expiration_time,
                           metric_min_average, metric_min_average_seconds,
                           metric_trigger, alert_threshold, metric_alerters,
                           autoaggregate, autoaggregate_value, algorithm):
    """
    Filter timeseries and run selected algorithm.
    """

    if ENABLE_BOUNDARY_DEBUG:
        logger.info('debug :: assigning in algoritms.py - %s, %s' %
                    (metric_name, algorithm))

    # Get rid of short series
    if len(timeseries) < MIN_TOLERABLE_LENGTH:
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: TooShort - %s, %s' %
                        (metric_name, algorithm))
        raise TooShort()

    # Get rid of stale series
    if time() - timeseries[-1][0] > STALE_PERIOD:
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: Stale - %s, %s' % (metric_name, algorithm))
        raise Stale()

    # Get rid of boring series
    if len(set(item[1] for item in
               timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE:
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: Boring - %s, %s' % (metric_name, algorithm))
        raise Boring()

    if autoaggregate:
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: auto aggregating %s for %s' %
                        (metric_name, algorithm))
        try:
            agg_timeseries = autoaggregate_ts(timeseries, autoaggregate_value)
            if ENABLE_BOUNDARY_DEBUG:
                logger.info(
                    'debug :: aggregated_timeseries returned %s for %s' %
                    (metric_name, algorithm))
        except Exception as e:
            agg_timeseries = []
            if ENABLE_BOUNDARY_DEBUG:
                logger.info(
                    'debug error - autoaggregate excpection %s for %s' %
                    (metric_name, algorithm))
                logger.error('Algorithm error: %s' % traceback.format_exc())
                logger.error('error: %e' % e)

        if len(agg_timeseries) > 10:
            timeseries = agg_timeseries
        else:
            if ENABLE_BOUNDARY_DEBUG:
                logger.info('debug :: TooShort - %s, %s' %
                            (metric_name, algorithm))
            raise TooShort()

    if len(timeseries) < 10:
        if ENABLE_BOUNDARY_DEBUG:
            logger.info(
                'debug :: timeseries too short - %s - timeseries length - %s' %
                (metric_name, str(len(timeseries))))
        raise TooShort()

    try:
        ensemble = [
            globals()[algorithm](timeseries, metric_name,
                                 metric_expiration_time, metric_min_average,
                                 metric_min_average_seconds, metric_trigger)
        ]
        if ensemble.count(True) == 1:
            if ENABLE_BOUNDARY_DEBUG:
                logger.info(
                    'debug :: anomalous datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s'
                    % (str(timeseries[-1][1]), str(metric_name),
                       str(metric_expiration_time), str(metric_min_average),
                       str(metric_min_average_seconds), str(metric_trigger),
                       str(alert_threshold), str(metric_alerters),
                       str(algorithm)))
            return True, ensemble, timeseries[-1][
                1], metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm
        else:
            if ENABLE_BOUNDARY_DEBUG:
                logger.info(
                    'debug :: not anomalous datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s'
                    % (str(timeseries[-1][1]), str(metric_name),
                       str(metric_expiration_time), str(metric_min_average),
                       str(metric_min_average_seconds), str(metric_trigger),
                       str(alert_threshold), str(metric_alerters),
                       str(algorithm)))
            return False, ensemble, timeseries[-1][
                1], metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm
    except:
        logger.error('Algorithm error: %s' % traceback.format_exc())
        return False, [], 1, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm