def derivative(metric_values): """ Compute the derivative of the given function. :type metric_values: pyspark.resultiterable.ResultIterable[dict] :param metric_values: The list of metric_values :return: Returns the derivative of the provided metric. """ if len(metric_values.data) < 2: return [] metric_name = metric_values.data[0]["metric"]["name"] + "_derivative" meta = metric_values.data[0]["meta"] dims = metric_values.data[0]["metric"]["dimensions"] # All values timestamps = map(lambda m: m["metric"]["timestamp"], metric_values) all_values = map(lambda m: m["metric"]["value"], metric_values) # Sort values all_values = [ y for (_, y) in sorted(zip(timestamps, all_values), key=lambda x: x[0]) ] timestamps = sorted(timestamps) # Remove duplicates last_timestamp = timestamps[0] tmp_all_values = [all_values[0]] tmp_timestamps = [last_timestamp] for index in range(1, len(timestamps)): if timestamps[index] == last_timestamp: continue else: last_timestamp = timestamps[index] tmp_all_values.append(all_values[index]) tmp_timestamps.append(last_timestamp) all_values = tmp_all_values timestamps = tmp_timestamps if len(all_values) < 2: return [] # Filter all values that have the same timestamp n = len(all_values) - 1 new_values = [ float(all_values[1] - all_values[0]) / float(timestamps[1] - timestamps[0]) ] for index in range(1, n): new_values.append( float(all_values[index + 1] - all_values[index - 1]) / float(timestamps[index + 1] - timestamps[index - 1])) new_values.append( float(all_values[n] - all_values[n - 1]) / float(timestamps[n] - timestamps[n - 1])) new_metrics = [ helpers.create_agg_metric(metric_name, meta, dims, tmst, val) for val, tmst in zip(new_values, timestamps) ] return new_metrics
def combine(all_metrics, combine_fn, combine_metric_name, nb_of_metrics): """ Combine the given metrics of this RDD into one. :type all_metrics: pyspark.resultiterable.ResultIterable :param all_metrics: List containing the metrics. :param combine_fn: Combiner. :type combine_metric_name: str :param combine_metric_name: Name of the new metric :type nb_of_metrics: int :param nb_of_metrics: The number of metrics expected """ # Separate metrics based on name separated_metrics = {} # type: dict[str, list[dict]] dims = None for el in all_metrics: key = el["metric"]["name"] if dims is None: dims = el["metric"]["dimensions"] if key not in separated_metrics: separated_metrics[key] = [el] else: separated_metrics[key].append(el) if len(separated_metrics.keys()) != nb_of_metrics: return [] separated_metrics = sorted(list(separated_metrics.iteritems()), key=lambda x: len(x[1])) separated_metrics = separated_metrics # type: list[(str, list[dict])] # Sort each metric for metric in separated_metrics: metric[1].sort(key=lambda v: v["metric"]["timestamp"]) temp_values = [] all_timestamp = map( lambda l: map(lambda x: x["metric"]["timestamp"], l[1]), separated_metrics) for index in range(0, len(separated_metrics[0][1])): current_env = { separated_metrics[0][0]: separated_metrics[0][1][index]["metric"]["value"] } timestamp = all_timestamp[0][index] for metric_index in range(1, len(separated_metrics)): metric_prop = separated_metrics[metric_index] metric_name = metric_prop[0] current_env[metric_name] = helpers.interpolate( timestamp, metric_prop[1], all_timestamp[metric_index]) temp_values.append(current_env) new_values = map(combine_fn, temp_values) new_metrics = [ helpers.create_agg_metric(combine_metric_name, {}, dims, tsmp, val) for val, tsmp in zip(new_values, all_timestamp[0]) ] return new_metrics
def aggregate(all_metrics, reducer, suffix): """ Aggregate values produced by different providers together. The metric name is assumed to be the same for all providers. :type all_metrics: list[dict] :param all_metrics: Values to aggregate mapping to a specific metric name. :type reducer: ((float, float) -> float, (float, float, float) -> float) :param reducer: Combine the metrics values together :type suffix: str :param suffix: Suffix to append to the metric name in its combined form """ # Collect metric separately separated_metrics = {} # type: dict[frozenset, list[dict]] for el in all_metrics: key = frozenset(el["metric"]["dimensions"].items()) if key not in separated_metrics: separated_metrics[key] = [el] else: separated_metrics[key].append(el) # Collect all dimensions dims = {} for metric_dims in separated_metrics.keys(): for prop, val in six.iteritems(dict(metric_dims)): if prop in dims: dims[prop].add(val) else: dims[prop] = set(val) # Sort each metric for _, metric in six.iteritems(separated_metrics): metric.sort(key=lambda v: v["metric"]["timestamp"]) separated_metrics = sorted(list(separated_metrics.values()), key=len) separated_metrics.reverse() # Compute the new values new_values = [] all_timestamps = [[x["metric"]["timestamp"] for x in l] for l in separated_metrics] metric_count = len(separated_metrics) for index in range(0, len(separated_metrics[0])): new_value = reducer[0]( separated_metrics[0][index]["metric"]["value"], metric_count) new_timestamp = separated_metrics[0][index]["metric"]["timestamp"] for metric_index in range(1, metric_count): new_value = reducer[1](new_value, helpers.interpolate( new_timestamp, separated_metrics[metric_index], all_timestamps[metric_index] ), metric_count) new_values.append((new_timestamp, new_value)) # Aggregate the other details: metric_name = separated_metrics[0][0]["metric"]["name"] + suffix meta = separated_metrics[0][0]["meta"] new_metrics = [ helpers.create_agg_metric( metric_name, meta, dims, val[0], val[1] ) for val in new_values ] return new_metrics
def aggregate(all_metrics, reducer, suffix): """ Aggregate values produced by different providers together. The metric name is assumed to be the same for all providers. :type all_metrics: list[dict] :param all_metrics: Values to aggregate mapping to a specific metric name. :type reducer: ((float, float) -> float, (float, float, float) -> float) :param reducer: Combine the metrics values together :type suffix: str :param suffix: Suffix to append to the metric name in its combined form """ # Collect metric separately separated_metrics = {} # type: dict[frozenset, list[dict]] for el in all_metrics: key = frozenset(el["metric"]["dimensions"].items()) if key not in separated_metrics: separated_metrics[key] = [el] else: separated_metrics[key].append(el) # Collect all dimensions dims = {} for metric_dims in separated_metrics.keys(): for prop, val in six.iteritems(dict(metric_dims)): if prop in dims: dims[prop].add(val) else: dims[prop] = set(val) # Sort each metric for _, metric in six.iteritems(separated_metrics): metric.sort(key=lambda v: v["metric"]["timestamp"]) separated_metrics = sorted(list(separated_metrics.values()), key=len) separated_metrics.reverse() # Compute the new values new_values = [] all_timestamps = [[x["metric"]["timestamp"] for x in l] for l in separated_metrics] metric_count = len(separated_metrics) for index in range(0, len(separated_metrics[0])): new_value = reducer[0]( separated_metrics[0][index]["metric"]["value"], metric_count) new_timestamp = separated_metrics[0][index]["metric"]["timestamp"] for metric_index in range(1, metric_count): new_value = reducer[1](new_value, helpers.interpolate( new_timestamp, separated_metrics[metric_index], all_timestamps[metric_index]), metric_count) new_values.append((new_timestamp, new_value)) # Aggregate the other details: metric_name = separated_metrics[0][0]["metric"]["name"] + suffix meta = separated_metrics[0][0]["meta"] new_metrics = [ helpers.create_agg_metric(metric_name, meta, dims, val[0], val[1]) for val in new_values ] return new_metrics
def combine(all_metrics, combine_fn, combine_metric_name, nb_of_metrics): """ Combine the given metrics of this RDD into one. :type all_metrics: pyspark.resultiterable.ResultIterable :param all_metrics: List containing the metrics. :param combine_fn: Combiner. :type combine_metric_name: str :param combine_metric_name: Name of the new metric :type nb_of_metrics: int :param nb_of_metrics: The number of metrics expected """ # Separate metrics based on name separated_metrics = {} # type: dict[str, list[dict]] dims = None for el in all_metrics: key = el["metric"]["name"] if dims is None: dims = el["metric"]["dimensions"] if key not in separated_metrics: separated_metrics[key] = [el] else: separated_metrics[key].append(el) if len(separated_metrics.keys()) != nb_of_metrics: return [] separated_metrics = sorted(list(six.iteritems(separated_metrics)), key=lambda x: len(x[1])) separated_metrics = separated_metrics # type: list[(str, list[dict])] # Sort each metric for metric in separated_metrics: metric[1].sort(key=lambda v: v["metric"]["timestamp"]) temp_values = [] all_timestamp = [[x["metric"]["timestamp"] for x in l[1]] for l in separated_metrics] for index in range(0, len(separated_metrics[0][1])): current_env = { separated_metrics[0][0]: separated_metrics[0][1][index]["metric"]["value"] } timestamp = all_timestamp[0][index] for metric_index in range(1, len(separated_metrics)): metric_prop = separated_metrics[metric_index] metric_name = metric_prop[0] current_env[metric_name] = helpers.interpolate( timestamp, metric_prop[1], all_timestamp[metric_index] ) temp_values.append(current_env) new_values = map(combine_fn, temp_values) new_metrics = [ helpers.create_agg_metric( combine_metric_name, {}, dims, tsmp, val ) for val, tsmp in zip(new_values, all_timestamp[0]) ] return new_metrics
def derivative(metric_values): """ Compute the derivative of the given function. :type metric_values: pyspark.resultiterable.ResultIterable[dict] :param metric_values: The list of metric_values :return: Returns the derivative of the provided metric. """ if len(metric_values) < 2: return [] metric_name = metric_values.data[0]["metric"]["name"] + "_derivative" meta = metric_values.data[0]["meta"] dims = metric_values.data[0]["metric"]["dimensions"] # All values timestamps = map(lambda m: m["metric"]["timestamp"], metric_values) all_values = map(lambda m: m["metric"]["value"], metric_values) # Sort values all_values = [y for (_, y) in sorted(zip(timestamps, all_values))] timestamps = sorted(timestamps) # Remove duplicates last_timestamp = timestamps[0] tmp_all_values = [all_values[0]] tmp_timestamps = [last_timestamp] for index in xrange(1, len(timestamps)): if timestamps[index] == last_timestamp: continue else: last_timestamp = timestamps[index] tmp_all_values.append(all_values[index]) tmp_timestamps.append(last_timestamp) all_values = tmp_all_values timestamps = tmp_timestamps if len(all_values) < 2: return [] # Filter all values that have the same timestamp n = len(all_values) - 1 new_values = [ float(all_values[1] - all_values[0]) / float(timestamps[1] - timestamps[0]) ] for index in xrange(1, n): new_values.append( float(all_values[index + 1] - all_values[index - 1]) / float(timestamps[index + 1] - timestamps[index - 1]) ) new_values.append( float(all_values[n] - all_values[n - 1]) / float(timestamps[n] - timestamps[n - 1]) ) new_metrics = [ helpers.create_agg_metric( metric_name, meta, dims, tmst, val ) for val, tmst in zip(new_values, timestamps) ] return new_metrics