def aggregated(refs_and_timeseries, operations, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) references = collections.defaultdict(list) for (reference, timeserie) in refs_and_timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.sampling)) references[timeserie.sampling].append(reference) series[timeserie.sampling].append(timeserie[from_:to_timestamp]) result = collections.defaultdict(lambda: { 'timestamps': [], 'granularity': [], 'values': [] }) for key in sorted(series, reverse=True): combine = numpy.concatenate(series[key]) # np.unique sorts results for us times, indices = numpy.unique(combine['timestamps'], return_inverse=True) # create nd-array (unique series x unique times) and fill filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill) val_grid = numpy.full((len(series[key]), len(times)), filler) start = 0 for i, split in enumerate(series[key]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero( ~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise exceptions.UnAggregableTimeseries( references[key], 'No overlap') if times.size: # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise exceptions.UnAggregableTimeseries( references[key], 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) granularity, times, values, is_aggregated = (agg_operations.evaluate( operations, key, times, values, False, references[key])) values = values.T if is_aggregated: idents = ["aggregated"] else: idents = ["%s_%s" % tuple(ref) for ref in references[key]] for i, ident in enumerate(idents): if fill == "dropna": pos = ~numpy.isnan(values[i]) v = values[i][pos] t = times[pos] else: v = values[i] t = times result[ident]["timestamps"].extend(t) result[ident]['granularity'].extend([granularity] * len(t)) result[ident]['values'].extend(v) return dict(((ident, list( six.moves.zip(result[ident]['timestamps'], result[ident]['granularity'], result[ident]['values']))) for ident in result))
def aggregated(refs_and_timeseries, operations, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) references = collections.defaultdict(list) lookup_keys = collections.defaultdict(list) for (ref, timeserie) in refs_and_timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.aggregation.granularity)) references[timeserie.aggregation.granularity].append(ref) lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key) series[timeserie.aggregation.granularity].append( timeserie[from_:to_timestamp]) result = [] is_aggregated = False result = {} for sampling in sorted(series, reverse=True): # np.unique sorts results for us times, indices = numpy.unique(numpy.concatenate( [i['timestamps'] for i in series[sampling]]), return_inverse=True) # create nd-array (unique series x unique times) and fill filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill) val_grid = numpy.full((len(series[sampling]), len(times)), filler) start = 0 for i, split in enumerate(series[sampling]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero( ~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise exceptions.UnAggregableTimeseries( lookup_keys[sampling], 'No overlap') if times.size: # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise exceptions.UnAggregableTimeseries( lookup_keys[sampling], 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) granularity, times, values, is_aggregated = (agg_operations.evaluate( operations, sampling, times, values, False, lookup_keys[sampling])) values = values.T result[sampling] = (granularity, times, values, references[sampling]) if is_aggregated: output = {"aggregated": []} for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] if fill == "dropna": pos = ~numpy.logical_or(numpy.isnan(values[0]), numpy.isinf(values[0])) v = values[0][pos] t = times[pos] else: v = values[0] t = times g = [granularity] * len(t) output["aggregated"].extend(six.moves.zip(t, g, v)) return output else: r_output = collections.defaultdict(lambda: collections.defaultdict( lambda: collections.defaultdict(list))) m_output = collections.defaultdict( lambda: collections.defaultdict(list)) for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] for i, ref in enumerate(references): if fill == "dropna": pos = ~numpy.logical_or(numpy.isnan(values[i]), numpy.isinf(values[i])) v = values[i][pos] t = times[pos] else: v = values[i] t = times g = [granularity] * len(t) measures = six.moves.zip(t, g, v) if ref.resource is None: m_output[ref.name][ref.aggregation].extend(measures) else: r_output[str(ref.resource.id)][ref.metric.name][ ref.aggregation].extend(measures) return r_output if r_output else m_output
def aggregated(refs_and_timeseries, operations, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) references = collections.defaultdict(list) lookup_keys = collections.defaultdict(list) for (ref, timeserie) in refs_and_timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.aggregation.granularity)) references[timeserie.aggregation.granularity].append(ref) lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key) series[timeserie.aggregation.granularity].append( timeserie[from_:to_timestamp]) result = [] is_aggregated = False result = {} for sampling in sorted(series, reverse=True): combine = numpy.concatenate(series[sampling]) # np.unique sorts results for us times, indices = numpy.unique(combine['timestamps'], return_inverse=True) # create nd-array (unique series x unique times) and fill filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill) val_grid = numpy.full((len(series[sampling]), len(times)), filler) start = 0 for i, split in enumerate(series[sampling]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero(~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise exceptions.UnAggregableTimeseries(lookup_keys[sampling], 'No overlap') if times.size: # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise exceptions.UnAggregableTimeseries( lookup_keys[sampling], 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) granularity, times, values, is_aggregated = ( agg_operations.evaluate(operations, sampling, times, values, False, lookup_keys[sampling])) values = values.T result[sampling] = (granularity, times, values, references[sampling]) if is_aggregated: output = {"aggregated": []} for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] if fill == "dropna": pos = ~numpy.isnan(values[0]) v = values[0][pos] t = times[pos] else: v = values[0] t = times g = [granularity] * len(t) output["aggregated"].extend(six.moves.zip(t, g, v)) return output else: r_output = collections.defaultdict( lambda: collections.defaultdict( lambda: collections.defaultdict(list))) m_output = collections.defaultdict( lambda: collections.defaultdict(list)) for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] for i, ref in enumerate(references): if fill == "dropna": pos = ~numpy.isnan(values[i]) v = values[i][pos] t = times[pos] else: v = values[i] t = times g = [granularity] * len(t) measures = six.moves.zip(t, g, v) if ref.resource is None: m_output[ref.name][ref.aggregation].extend(measures) else: r_output[str(ref.resource.id)][ ref.metric.name][ref.aggregation].extend(measures) return r_output if r_output else m_output