def metric_values(): # generator of a stream of float values, to be consumed by the stats functions for entity, container_file, metric, metric_value in stream_of_entity_with_metric(entities, local_metric, verbose, skipLibraries, regex_str_ignore_item, regex_str_traverse_files, regex_ignore_files, skip_zeroes=skip_zeroes): yield metric_value
def process_generic_metrics(db, cmdline_arguments, jsonCmdLineParam, entityQuery, lambda_to_print, regex_str_ignore_item, scope_name): regex_str_traverse_files = cmdline_arguments.get("--regexTraverseFiles", "*") regex_ignore_files = cmdline_arguments.get("--regexIgnoreFiles", None) max_metrics_json = cmdline_arguments[jsonCmdLineParam] max_values_allowed_by_metric = {} violation_count = 0 entities = db.ents(entityQuery) skipLibraries = cmdline_arguments["--skipLibs"] == "true" skip_zeroes = cmdline_arguments.get("--skipZeroes", False) verbose = cmdline_arguments["--verbose"] save_histograms = cmdline_arguments["--histograms"] try: max_values_allowed_by_metric = load_metrics_thresholds( max_metrics_json) except Exception as ex: print("SEVERE WARNING loading json: %s" % ex) max_values_allowed_by_metric = {} if not isinstance(max_values_allowed_by_metric, dict): max_values_allowed_by_metric = {} if len(max_values_allowed_by_metric) == 0: # No metrics passed in print("*** EMPTY Metrics. JSON error? (%s)" % max_metrics_json) return [0, {}] highest_values_found_by_metric = {} last_processed_metric = "" # fix for #21, to reuse values last_all_values = [] # fix for #21, to reuse values last_max_value_found = -1 stats_cache = {} # fix for #22 - use cached value for stats sorted_metrics = sorted(max_values_allowed_by_metric.keys(), key=metric_name_for_sorting) for metric in sorted_metrics: max_allowed_value = max_values_allowed_by_metric[metric] all_values = [ ] # we may need to collect all values, if we are going to save a histogram lambda_stats = None if metric.count(':') == 1: #fix for #42 - can have only 1 : lambda_name, adjusted_metric = metric.split(":") lambda_stats = STATS_LAMBDAS.get(lambda_name.upper().strip(), None) if lambda_stats is None: # regular, not stats max_value_found = -1 entity_with_max_value_found = None has_stats_counterpart = (":%s" % metric) in "".join(sorted_metrics) for entity, container_file, metric, metric_value in stream_of_entity_with_metric( entities, metric, verbose, skipLibraries, regex_str_ignore_item, regex_str_traverse_files, regex_ignore_files, skip_zeroes=skip_zeroes): if save_histograms or has_stats_counterpart: # fix for #22 - cache values for stats all_values.append(metric_value) if metric_value > highest_values_found_by_metric.get( metric, -1): # even a zero we want to tag as a max highest_values_found_by_metric[metric] = metric_value max_allowed = max_values_allowed_by_metric[metric] if metric_value > max_allowed: # we found a violation violation_count = violation_count + 1 lambda_to_print(entity, metric, metric_value, container_file=container_file) if metric_value > max_value_found: # max found, which could be a violator or not max_value_found = metric_value entity_with_max_value_found = entity if entity_with_max_value_found is not None: if bool(cmdline_arguments["--showHighest"]): print("...........................................") kind = "violator" if max_value_found <= max_allowed_value: kind = "non violator" print( "INFO: HIGHEST %s %s found (violation threshold is %s):\t" % (metric, kind, max_allowed_value), end="") lambda_to_print( entity_with_max_value_found, metric, max_value_found, container_file=container_file ) # prints the max found, which may be a violator or not print("...........................................") last_processed_metric = metric # fix for #21, to reuse values last_all_values = all_values # fix for #21, to reuse values last_max_value_found = max_value_found else: # stats, compute on the whole population def metric_values( ): # generator of a stream of float values, to be consumed by the stats functions for entity, container_file, metric, metric_value in stream_of_entity_with_metric( entities, adjusted_metric, verbose, skipLibraries, regex_str_ignore_item, regex_str_traverse_files, regex_ignore_files, skip_zeroes=skip_zeroes): yield metric_value if adjusted_metric == last_processed_metric: # fix for #21 - reuses values, thanks to sorting we know teh pure metric must have come just before all_values = last_all_values max_value_found = last_max_value_found else: all_values = [value for value in metric_values()] if save_histograms: max_value_found = max( all_values) if len(all_values) > 0 else 0 last_max_value_found = max_value_found # fix for #21, same as above last_processed_metric = adjusted_metric # fix for 21. in case only stats functions are used, not the pure one. last_all_values = all_values # fix for #21, same as above stats_value = stats_cache.get(adjusted_metric, {}).get( lambda_name, None) # fix for #22 - used cached value for stats if stats_value is None: try: stats_value = lambda_stats(all_values) except statistics.StatisticsError as se: print("ERROR in %s: %s" % (metric, se)) continue highest_values_found_by_metric[metric] = stats_value if stats_value > max_allowed_value: # we found a violation violation_count = violation_count + 1 lambda_to_print(DummyEntity(), metric, stats_value) else: if bool(cmdline_arguments["--showHighest"]): print("...........................................") print("INFO(STATS): %s = %s (violation threshold is %s):" % (metric, stats_value, max_allowed_value)) print("...........................................") if save_histograms and len(all_values) > 0 and lambda_stats is None: file_name, mean, median, pstdev = save_histogram( bool(cmdline_arguments["--showMeanMedian"]), bool(cmdline_arguments["--logarithmic"]), os.path.split(db.name())[-1], max_value_found, metric, all_values, scope_name) if mean is not None: stats_cache[metric] = { "AVG": mean, "MEDIAN": median, "STDEV": pstdev } # fix for #22 - used cached value for stats if verbose: print("Saved %s" % file_name) return [ violation_count, highest_values_found_by_metric, max_values_allowed_by_metric ]
def process_generic_metrics (db, cmdline_arguments, jsonCmdLineParam, entityQuery, lambda_to_print, regex_str_ignore_item, scope_name): regex_str_traverse_files = cmdline_arguments.get("--regexTraverseFiles", "*") regex_ignore_files = cmdline_arguments.get("--regexIgnoreFiles", None) max_metrics_json = cmdline_arguments[jsonCmdLineParam] max_values_allowed_by_metric = {} violation_count = 0 entities = db.ents(entityQuery) skipLibraries = cmdline_arguments["--skipLibs"] == "true" skip_zeroes = cmdline_arguments.get("--skipZeroes", False) verbose = cmdline_arguments["--verbose"] save_histograms = cmdline_arguments["--histograms"] try: max_values_allowed_by_metric = load_metrics_thresholds(max_metrics_json) except Exception as ex: print("SEVERE WARNING loading json: %s" % ex) max_values_allowed_by_metric = {} if not isinstance(max_values_allowed_by_metric, dict): max_values_allowed_by_metric = {} if len(max_values_allowed_by_metric) == 0: # No metrics passed in print ("*** EMPTY Metrics. JSON error? (%s)" % max_metrics_json) return [0, {}, {}] highest_values_found_by_metric = {} last_processed_metric = "" # fix for #21, to reuse values last_all_values = [] # fix for #21, to reuse values last_max_value_found = -1 stats_cache = {} # fix for #22 - use cached value for stats sorted_metrics = sorted(max_values_allowed_by_metric.keys(), key=metric_name_for_sorting) for metric in sorted_metrics: max_allowed_value = max_values_allowed_by_metric[metric] all_values = [] # we may need to collect all values, if we are going to save a histogram lambda_stats = None if metric.count(':') == 1: #fix for #42 - can have only 1 : lambda_name, adjusted_metric = metric.split(":") lambda_stats = STATS_LAMBDAS.get(lambda_name.upper().strip(), None) if lambda_stats is None: # regular, not stats max_value_found = -1 entity_with_max_value_found = None has_stats_counterpart = (":%s" % metric) in "".join(sorted_metrics) for entity, container_file, metric, metric_value in stream_of_entity_with_metric(entities, metric, verbose, skipLibraries, regex_str_ignore_item, regex_str_traverse_files, regex_ignore_files, skip_zeroes=skip_zeroes): if save_histograms or has_stats_counterpart: # fix for #22 - cache values for stats all_values.append(metric_value) if metric_value > highest_values_found_by_metric.get(metric, -1): # even a zero we want to tag as a max highest_values_found_by_metric[metric] = metric_value max_allowed = max_values_allowed_by_metric[metric] if metric_value > max_allowed: # we found a violation violation_count = violation_count + 1 lambda_to_print(entity, metric, metric_value, container_file=container_file) if metric_value > max_value_found: # max found, which could be a violator or not max_value_found = metric_value entity_with_max_value_found = entity if entity_with_max_value_found is not None: if bool(cmdline_arguments["--showHighest"]): print("...........................................") kind = "violator" if max_value_found <= max_allowed_value: kind = "non violator" print("INFO: HIGHEST %s %s found (violation threshold is %s):\t" % (metric, kind, max_allowed_value), end="") lambda_to_print(entity_with_max_value_found, metric, max_value_found, container_file=container_file) # prints the max found, which may be a violator or not print("...........................................") last_processed_metric = metric # fix for #21, to reuse values last_all_values = all_values # fix for #21, to reuse values last_max_value_found = max_value_found else: # stats, compute on the whole population def metric_values(): # generator of a stream of float values, to be consumed by the stats functions for entity, container_file, metric, metric_value in stream_of_entity_with_metric(entities, adjusted_metric, verbose, skipLibraries, regex_str_ignore_item, regex_str_traverse_files, regex_ignore_files, skip_zeroes=skip_zeroes): yield metric_value if adjusted_metric == last_processed_metric: # fix for #21 - reuses values, thanks to sorting we know teh pure metric must have come just before all_values = last_all_values max_value_found = last_max_value_found else: all_values = [value for value in metric_values()] if save_histograms: max_value_found = max(all_values) if len(all_values) > 0 else 0 last_max_value_found = max_value_found # fix for #21, same as above last_processed_metric = adjusted_metric # fix for 21. in case only stats functions are used, not the pure one. last_all_values = all_values # fix for #21, same as above stats_value = stats_cache.get(adjusted_metric, {}).get(lambda_name, None) # fix for #22 - used cached value for stats if stats_value is None: try: stats_value = lambda_stats(all_values) except statistics.StatisticsError as se: print ("ERROR in %s: %s" % (metric, se)) continue highest_values_found_by_metric[metric] = stats_value if stats_value > max_allowed_value: # we found a violation violation_count = violation_count + 1 lambda_to_print(DummyEntity(), metric, stats_value) else: if bool(cmdline_arguments["--showHighest"]): print("...........................................") print("INFO(STATS): %s = %s (violation threshold is %s):" % (metric, stats_value, max_allowed_value)) print("...........................................") if save_histograms and len(all_values) > 0 and lambda_stats is None: output_dir = cmdline_arguments["--outputDir"] file_prefix = "%s%s%s" % (output_dir, os.sep, os.path.split(db.name())[-1]) file_name, mean, median, pstdev = save_histogram(bool(cmdline_arguments["--showMeanMedian"]), bool(cmdline_arguments["--logarithmic"]), file_prefix, max_value_found, metric, all_values, scope_name) if mean is not None: stats_cache[metric] = {"AVG": mean, "MEDIAN": median, "STDEV": pstdev} # fix for #22 - used cached value for stats if verbose: print("Saved %s" % file_name) return [violation_count, highest_values_found_by_metric, max_values_allowed_by_metric]