def combine(all_metrics, combine_fn, combine_metric_name, nb_of_metrics): """ Combine the given metrics of this RDD into one. :type all_metrics: pyspark.resultiterable.ResultIterable :param all_metrics: List containing the metrics. :param combine_fn: Combiner. :type combine_metric_name: str :param combine_metric_name: Name of the new metric :type nb_of_metrics: int :param nb_of_metrics: The number of metrics expected """ # Separate metrics based on name separated_metrics = {} # type: dict[str, list[dict]] dims = None for el in all_metrics: key = el["metric"]["name"] if dims is None: dims = el["metric"]["dimensions"] if key not in separated_metrics: separated_metrics[key] = [el] else: separated_metrics[key].append(el) if len(separated_metrics.keys()) != nb_of_metrics: return [] separated_metrics = sorted(list(separated_metrics.iteritems()), key=lambda x: len(x[1])) separated_metrics = separated_metrics # type: list[(str, list[dict])] # Sort each metric for metric in separated_metrics: metric[1].sort(key=lambda v: v["metric"]["timestamp"]) temp_values = [] all_timestamp = map( lambda l: map(lambda x: x["metric"]["timestamp"], l[1]), separated_metrics) for index in range(0, len(separated_metrics[0][1])): current_env = { separated_metrics[0][0]: separated_metrics[0][1][index]["metric"]["value"] } timestamp = all_timestamp[0][index] for metric_index in range(1, len(separated_metrics)): metric_prop = separated_metrics[metric_index] metric_name = metric_prop[0] current_env[metric_name] = helpers.interpolate( timestamp, metric_prop[1], all_timestamp[metric_index]) temp_values.append(current_env) new_values = map(combine_fn, temp_values) new_metrics = [ helpers.create_agg_metric(combine_metric_name, {}, dims, tsmp, val) for val, tsmp in zip(new_values, all_timestamp[0]) ] return new_metrics
def aggregate(all_metrics, reducer, suffix): """ Aggregate values produced by different providers together. The metric name is assumed to be the same for all providers. :type all_metrics: list[dict] :param all_metrics: Values to aggregate mapping to a specific metric name. :type reducer: ((float, float) -> float, (float, float, float) -> float) :param reducer: Combine the metrics values together :type suffix: str :param suffix: Suffix to append to the metric name in its combined form """ # Collect metric separately separated_metrics = {} # type: dict[frozenset, list[dict]] for el in all_metrics: key = frozenset(el["metric"]["dimensions"].items()) if key not in separated_metrics: separated_metrics[key] = [el] else: separated_metrics[key].append(el) # Collect all dimensions dims = {} for metric_dims in separated_metrics.keys(): for prop, val in six.iteritems(dict(metric_dims)): if prop in dims: dims[prop].add(val) else: dims[prop] = set(val) # Sort each metric for _, metric in six.iteritems(separated_metrics): metric.sort(key=lambda v: v["metric"]["timestamp"]) separated_metrics = sorted(list(separated_metrics.values()), key=len) separated_metrics.reverse() # Compute the new values new_values = [] all_timestamps = [[x["metric"]["timestamp"] for x in l] for l in separated_metrics] metric_count = len(separated_metrics) for index in range(0, len(separated_metrics[0])): new_value = reducer[0]( separated_metrics[0][index]["metric"]["value"], metric_count) new_timestamp = separated_metrics[0][index]["metric"]["timestamp"] for metric_index in range(1, metric_count): new_value = reducer[1](new_value, helpers.interpolate( new_timestamp, separated_metrics[metric_index], all_timestamps[metric_index] ), metric_count) new_values.append((new_timestamp, new_value)) # Aggregate the other details: metric_name = separated_metrics[0][0]["metric"]["name"] + suffix meta = separated_metrics[0][0]["meta"] new_metrics = [ helpers.create_agg_metric( metric_name, meta, dims, val[0], val[1] ) for val in new_values ] return new_metrics
def aggregate(all_metrics, reducer, suffix): """ Aggregate values produced by different providers together. The metric name is assumed to be the same for all providers. :type all_metrics: list[dict] :param all_metrics: Values to aggregate mapping to a specific metric name. :type reducer: ((float, float) -> float, (float, float, float) -> float) :param reducer: Combine the metrics values together :type suffix: str :param suffix: Suffix to append to the metric name in its combined form """ # Collect metric separately separated_metrics = {} # type: dict[frozenset, list[dict]] for el in all_metrics: key = frozenset(el["metric"]["dimensions"].items()) if key not in separated_metrics: separated_metrics[key] = [el] else: separated_metrics[key].append(el) # Collect all dimensions dims = {} for metric_dims in separated_metrics.keys(): for prop, val in six.iteritems(dict(metric_dims)): if prop in dims: dims[prop].add(val) else: dims[prop] = set(val) # Sort each metric for _, metric in six.iteritems(separated_metrics): metric.sort(key=lambda v: v["metric"]["timestamp"]) separated_metrics = sorted(list(separated_metrics.values()), key=len) separated_metrics.reverse() # Compute the new values new_values = [] all_timestamps = [[x["metric"]["timestamp"] for x in l] for l in separated_metrics] metric_count = len(separated_metrics) for index in range(0, len(separated_metrics[0])): new_value = reducer[0]( separated_metrics[0][index]["metric"]["value"], metric_count) new_timestamp = separated_metrics[0][index]["metric"]["timestamp"] for metric_index in range(1, metric_count): new_value = reducer[1](new_value, helpers.interpolate( new_timestamp, separated_metrics[metric_index], all_timestamps[metric_index]), metric_count) new_values.append((new_timestamp, new_value)) # Aggregate the other details: metric_name = separated_metrics[0][0]["metric"]["name"] + suffix meta = separated_metrics[0][0]["meta"] new_metrics = [ helpers.create_agg_metric(metric_name, meta, dims, val[0], val[1]) for val in new_values ] return new_metrics
def combine(all_metrics, combine_fn, combine_metric_name, nb_of_metrics): """ Combine the given metrics of this RDD into one. :type all_metrics: pyspark.resultiterable.ResultIterable :param all_metrics: List containing the metrics. :param combine_fn: Combiner. :type combine_metric_name: str :param combine_metric_name: Name of the new metric :type nb_of_metrics: int :param nb_of_metrics: The number of metrics expected """ # Separate metrics based on name separated_metrics = {} # type: dict[str, list[dict]] dims = None for el in all_metrics: key = el["metric"]["name"] if dims is None: dims = el["metric"]["dimensions"] if key not in separated_metrics: separated_metrics[key] = [el] else: separated_metrics[key].append(el) if len(separated_metrics.keys()) != nb_of_metrics: return [] separated_metrics = sorted(list(six.iteritems(separated_metrics)), key=lambda x: len(x[1])) separated_metrics = separated_metrics # type: list[(str, list[dict])] # Sort each metric for metric in separated_metrics: metric[1].sort(key=lambda v: v["metric"]["timestamp"]) temp_values = [] all_timestamp = [[x["metric"]["timestamp"] for x in l[1]] for l in separated_metrics] for index in range(0, len(separated_metrics[0][1])): current_env = { separated_metrics[0][0]: separated_metrics[0][1][index]["metric"]["value"] } timestamp = all_timestamp[0][index] for metric_index in range(1, len(separated_metrics)): metric_prop = separated_metrics[metric_index] metric_name = metric_prop[0] current_env[metric_name] = helpers.interpolate( timestamp, metric_prop[1], all_timestamp[metric_index] ) temp_values.append(current_env) new_values = map(combine_fn, temp_values) new_metrics = [ helpers.create_agg_metric( combine_metric_name, {}, dims, tsmp, val ) for val, tsmp in zip(new_values, all_timestamp[0]) ] return new_metrics