def test_aggregated_different_archive_overlap_edge_missing2(self): tsc1 = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling) tsc2 = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling) tsb1.set_values([ (datetime.datetime(2014, 1, 1, 12, 3, 0), 4), ], before_truncate_callback=tsc1.update) tsb2.set_values([ (datetime.datetime(2014, 1, 1, 11, 0, 0), 4), (datetime.datetime(2014, 1, 1, 12, 3, 0), 4), ], before_truncate_callback=tsc2.update) output = carbonara.AggregatedTimeSerie.aggregated([tsc1, tsc2], aggregation='mean') self.assertEqual([ (pandas.Timestamp('2014-01-01 12:03:00'), 60.0, 4.0), ], output)
def test_aggregated_different_archive_no_overlap2(self): tsc1 = carbonara.AggregatedTimeSerie(sampling=60, max_size=50, aggregation_method='mean') tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling) tsc2 = carbonara.AggregatedTimeSerie(sampling=60, max_size=50, aggregation_method='mean') tsb1.set_values([(datetime.datetime(2014, 1, 1, 12, 3, 0), 4)], before_truncate_callback=tsc1.update) self.assertRaises(carbonara.UnAggregableTimeseries, carbonara.AggregatedTimeSerie.aggregated, [tsc1, tsc2], aggregation='mean')
def test_fetch_nano(self): ts = carbonara.AggregatedTimeSerie(sampling=0.2, max_size=10, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 11, 46, 0, 200123), 4), (datetime.datetime(2014, 1, 1, 11, 46, 0, 340000), 8), (datetime.datetime(2014, 1, 1, 11, 47, 0, 323154), 50), (datetime.datetime(2014, 1, 1, 11, 48, 0, 590903), 4), (datetime.datetime(2014, 1, 1, 11, 48, 0, 903291), 4), ], before_truncate_callback=ts.update) tsb.set_values([ (datetime.datetime(2014, 1, 1, 11, 48, 0, 821312), 5), ], before_truncate_callback=ts.update) self.assertEqual( [(datetime.datetime(2014, 1, 1, 11, 46, 0, 200000), 0.2, 6.0), (datetime.datetime(2014, 1, 1, 11, 47, 0, 200000), 0.2, 50.0), (datetime.datetime(2014, 1, 1, 11, 48, 0, 400000), 0.2, 4.0), (datetime.datetime(2014, 1, 1, 11, 48, 0, 800000), 0.2, 4.5)], ts.fetch())
def test_fetch_agg_max(self): ts = carbonara.AggregatedTimeSerie(sampling=60, max_size=60, aggregation_method='max') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 1, 4), 4), (datetime.datetime(2014, 1, 1, 12, 1, 9), 7), (datetime.datetime(2014, 1, 1, 12, 2, 1), 15), (datetime.datetime(2014, 1, 1, 12, 2, 12), 1)], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:00'), 60.0, 3), (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 7), (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 15), ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0))) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 2, 13), 110)], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:00'), 60.0, 3), (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 7), (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 110), ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
def test_fetch_agg_std(self): ts = carbonara.AggregatedTimeSerie(sampling=60, max_size=60, aggregation_method='std') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 1, 4), 4), (datetime.datetime(2014, 1, 1, 12, 1, 9), 7), (datetime.datetime(2014, 1, 1, 12, 2, 1), 15), (datetime.datetime(2014, 1, 1, 12, 2, 12), 1)], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 2.1213203435596424), (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 9.8994949366116654), ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0))) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 2, 13), 110)], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 2.1213203435596424), (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 59.304300012730948), ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
def test_74_percentile_serialized(self): ts = carbonara.AggregatedTimeSerie(sampling='1Min', aggregation_method='74pct') ts.update( carbonara.TimeSerie.from_tuples([ (datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 0, 4), 5), (datetime.datetime(2014, 1, 1, 12, 0, 9), 6) ])) self.assertEqual(1, len(ts)) self.assertEqual(5.48, ts[datetime.datetime(2014, 1, 1, 12, 0, 0)]) # Serialize and unserialize ts = carbonara.AggregatedTimeSerie.unserialize(ts.serialize()) ts.update( carbonara.TimeSerie.from_tuples([ (datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 0, 4), 5), (datetime.datetime(2014, 1, 1, 12, 0, 9), 6) ])) self.assertEqual(1, len(ts)) self.assertEqual(5.48, ts[datetime.datetime(2014, 1, 1, 12, 0, 0)])
def _get_splits_and_unserialize(self, metrics_aggregations_keys): """Get splits and unserialize them :param metrics_aggregations_keys: A dict where keys are `storage.Metric` and values are dict of {Aggregation: [SplitKey]} to retrieve. :return: A dict where keys are `storage.Metric` and values are dict {aggregation: [`carbonara.AggregatedTimeSerie`]}. """ raw_measures = self._get_splits(metrics_aggregations_keys) results = collections.defaultdict( lambda: collections.defaultdict(list)) for metric, aggregations_and_raws in six.iteritems(raw_measures): for aggregation, raws in six.iteritems(aggregations_and_raws): for key, raw in six.moves.zip( metrics_aggregations_keys[metric][aggregation], raws): try: ts = carbonara.AggregatedTimeSerie.unserialize( raw, key, aggregation) except carbonara.InvalidData: LOG.error( "Data corruption detected for %s " "aggregated `%s' timeserie, granularity " "`%s' around time `%s', ignoring.", metric.id, aggregation.method, key.sampling, key) ts = carbonara.AggregatedTimeSerie(aggregation) results[metric][aggregation].append(ts) return results
def _get_splits_and_unserialize(self, metric, keys_and_aggregations): """Get splits and unserialize them :param metric: The metric to retrieve. :param keys_and_aggregations: A list of tuple (SplitKey, Aggregation) to retrieve. :return: A list of AggregatedTimeSerie. """ if not keys_and_aggregations: return [] raw_measures = self._get_measures(metric, keys_and_aggregations) results = [] for (key, aggregation), raw in six.moves.zip( keys_and_aggregations, raw_measures): try: ts = carbonara.AggregatedTimeSerie.unserialize( raw, key, aggregation) except carbonara.InvalidData: LOG.error("Data corruption detected for %s " "aggregated `%s' timeserie, granularity `%s' " "around time `%s', ignoring.", metric.id, aggregation.method, key.sampling, key) ts = carbonara.AggregatedTimeSerie(aggregation) results.append(ts) return results
def test_aggregated_different_archive_overlap_edge_missing1(self): tsc1 = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling) tsc2 = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling) tsb1.set_values([ (datetime.datetime(2014, 1, 1, 12, 3, 0), 9), (datetime.datetime(2014, 1, 1, 12, 4, 0), 1), (datetime.datetime(2014, 1, 1, 12, 5, 0), 2), (datetime.datetime(2014, 1, 1, 12, 6, 0), 7), (datetime.datetime(2014, 1, 1, 12, 7, 0), 5), (datetime.datetime(2014, 1, 1, 12, 8, 0), 3), ], before_truncate_callback=tsc1.update) tsb2.set_values([ (datetime.datetime(2014, 1, 1, 11, 0, 0), 6), (datetime.datetime(2014, 1, 1, 12, 1, 0), 2), (datetime.datetime(2014, 1, 1, 12, 2, 0), 13), (datetime.datetime(2014, 1, 1, 12, 3, 0), 24), (datetime.datetime(2014, 1, 1, 12, 4, 0), 4), (datetime.datetime(2014, 1, 1, 12, 5, 0), 16), (datetime.datetime(2014, 1, 1, 12, 6, 0), 12), ], before_truncate_callback=tsc2.update) # By default we require 100% of point that overlap # but we allow that the last datapoint is missing # of the precisest granularity output = carbonara.AggregatedTimeSerie.aggregated([tsc1, tsc2], aggregation='sum') self.assertEqual([ (pandas.Timestamp('2014-01-01 12:03:00'), 60.0, 33.0), (pandas.Timestamp('2014-01-01 12:04:00'), 60.0, 5.0), (pandas.Timestamp('2014-01-01 12:05:00'), 60.0, 18.0), (pandas.Timestamp('2014-01-01 12:06:00'), 60.0, 19.0), ], output)
def _get_measures_timeserie(storage, ref, granularity, *args, **kwargs): agg = ref.metric.archive_policy.get_aggregation(ref.aggregation, granularity) try: data = storage.get_aggregated_measures({ref.metric: [agg]}, *args, **kwargs)[ref.metric][agg] except gnocchi_storage.MetricDoesNotExist: data = carbonara.AggregatedTimeSerie( carbonara.Aggregation(ref.aggregation, granularity, None)) return (ref, data)
def test_down_sampling(self): ts = carbonara.AggregatedTimeSerie(sampling='5Min', aggregation_method='mean') ts.update( carbonara.TimeSerie.from_data([ datetime.datetime(2014, 1, 1, 12, 0, 0), datetime.datetime(2014, 1, 1, 12, 0, 4), datetime.datetime(2014, 1, 1, 12, 0, 9) ], [3, 5, 7])) self.assertEqual(1, len(ts)) self.assertEqual(5, ts[datetime.datetime(2014, 1, 1, 12, 0, 0)])
def test_before_epoch(self): ts = carbonara.AggregatedTimeSerie(sampling='1Min', aggregation_method='74pct') self.assertRaises( carbonara.BeforeEpochError, ts.update, carbonara.TimeSerie.from_tuples([ (datetime.datetime(1950, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 0, 4), 5), (datetime.datetime(2014, 1, 1, 12, 0, 9), 6) ]))
def _get_measures_timeserie(self, metric, aggregation, granularity, from_timestamp=None, to_timestamp=None): # Find the number of point for d in metric.archive_policy.definition: if d.granularity == granularity: points = d.points break else: raise storage.GranularityDoesNotExist(metric, granularity) all_keys = None try: all_keys = self._list_split_keys_for_metric( metric, aggregation, granularity) except storage.MetricDoesNotExist: for d in metric.archive_policy.definition: if d.granularity == granularity: return carbonara.AggregatedTimeSerie( sampling=granularity, aggregation_method=aggregation, max_size=d.points) raise storage.GranularityDoesNotExist(metric, granularity) if from_timestamp: from_timestamp = str( carbonara.SplitKey.from_timestamp_and_sampling( from_timestamp, granularity)) if to_timestamp: to_timestamp = str( carbonara.SplitKey.from_timestamp_and_sampling( to_timestamp, granularity)) timeseries = list( filter( lambda x: x is not None, self._map_in_thread( self._get_measures_and_unserialize, ((metric, key, aggregation, granularity) for key in sorted(all_keys) if ((not from_timestamp or key >= from_timestamp) and ( not to_timestamp or key <= to_timestamp)))))) return carbonara.AggregatedTimeSerie.from_timeseries( sampling=granularity, aggregation_method=aggregation, timeseries=timeseries, max_size=points)
def test_95_percentile(self): ts = carbonara.AggregatedTimeSerie(sampling='1Min', aggregation_method='95pct') ts.update( carbonara.TimeSerie.from_tuples([ (datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 0, 4), 5), (datetime.datetime(2014, 1, 1, 12, 0, 9), 6) ])) self.assertEqual(1, len(ts)) self.assertEqual(5.9000000000000004, ts[datetime.datetime(2014, 1, 1, 12, 0, 0)])
def test_no_truncation(self): ts = carbonara.AggregatedTimeSerie(sampling=60, aggregation_method='mean') tsb = carbonara.BoundTimeSerie() for i in six.moves.range(1, 11): tsb.set_values( [(datetime.datetime(2014, 1, 1, 12, i, i), float(i))], before_truncate_callback=ts.update) tsb.set_values( [(datetime.datetime(2014, 1, 1, 12, i, i + 1), float(i + 1))], before_truncate_callback=ts.update) self.assertEqual(i, len(ts.fetch()))
def test_max_size(self): ts = carbonara.AggregatedTimeSerie(sampling=1, max_size=2, aggregation_method='mean') ts.update( carbonara.TimeSerie.from_data([ datetime.datetime(2014, 1, 1, 12, 0, 0), datetime.datetime(2014, 1, 1, 12, 0, 4), datetime.datetime(2014, 1, 1, 12, 0, 9) ], [3, 5, 6])) self.assertEqual(2, len(ts)) self.assertEqual(5, ts[0]) self.assertEqual(6, ts[1])
def test_to_dict_from_dict(self): ts = carbonara.AggregatedTimeSerie(sampling='1Min', max_size=2, aggregation_method='max') ts.update( carbonara.TimeSerie.from_data([ datetime.datetime(2014, 1, 1, 12, 0, 0), datetime.datetime(2014, 1, 1, 12, 1, 4), datetime.datetime(2014, 1, 1, 12, 1, 9), datetime.datetime(2014, 1, 1, 12, 2, 12) ], [3, 5, 7, 1])) ts2 = carbonara.AggregatedTimeSerie.from_dict(ts.to_dict()) self.assertEqual(ts, ts2)
def test_fetch(self): ts = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 11, 46, 4), 4), (datetime.datetime(2014, 1, 1, 11, 47, 34), 8), (datetime.datetime(2014, 1, 1, 11, 50, 54), 50), (datetime.datetime(2014, 1, 1, 11, 54, 45), 4), (datetime.datetime(2014, 1, 1, 11, 56, 49), 4), (datetime.datetime(2014, 1, 1, 11, 57, 22), 6), (datetime.datetime(2014, 1, 1, 11, 58, 22), 5), (datetime.datetime(2014, 1, 1, 12, 1, 4), 4), (datetime.datetime(2014, 1, 1, 12, 1, 9), 7), (datetime.datetime(2014, 1, 1, 12, 2, 1), 15), (datetime.datetime(2014, 1, 1, 12, 2, 12), 1), (datetime.datetime(2014, 1, 1, 12, 3, 0), 3), (datetime.datetime(2014, 1, 1, 12, 4, 9), 7), (datetime.datetime(2014, 1, 1, 12, 5, 1), 15), (datetime.datetime(2014, 1, 1, 12, 5, 12), 1), (datetime.datetime(2014, 1, 1, 12, 6, 0, 2), 3), ], before_truncate_callback=ts.update) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 6), 5), ], before_truncate_callback=ts.update) self.assertEqual([(datetime.datetime(2014, 1, 1, 11, 54), 60.0, 4.0), (datetime.datetime(2014, 1, 1, 11, 56), 60.0, 4.0), (datetime.datetime(2014, 1, 1, 11, 57), 60.0, 6.0), (datetime.datetime(2014, 1, 1, 11, 58), 60.0, 5.0), (datetime.datetime(2014, 1, 1, 12, 1), 60.0, 5.5), (datetime.datetime(2014, 1, 1, 12, 2), 60.0, 8.0), (datetime.datetime(2014, 1, 1, 12, 3), 60.0, 3.0), (datetime.datetime(2014, 1, 1, 12, 4), 60.0, 7.0), (datetime.datetime(2014, 1, 1, 12, 5), 60.0, 8.0), (datetime.datetime(2014, 1, 1, 12, 6), 60.0, 4.0)], ts.fetch()) self.assertEqual([(datetime.datetime(2014, 1, 1, 12, 1), 60.0, 5.5), (datetime.datetime(2014, 1, 1, 12, 2), 60.0, 8.0), (datetime.datetime(2014, 1, 1, 12, 3), 60.0, 3.0), (datetime.datetime(2014, 1, 1, 12, 4), 60.0, 7.0), (datetime.datetime(2014, 1, 1, 12, 5), 60.0, 8.0), (datetime.datetime(2014, 1, 1, 12, 6), 60.0, 4.0)], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
def test_back_window_ignore(self): """Back window testing. Test the back window on an archive is not longer than the window we aggregate on. """ ts = carbonara.AggregatedTimeSerie(sampling=1, max_size=60, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 1, 2300), 1), (datetime.datetime(2014, 1, 1, 12, 0, 1, 4600), 2), (datetime.datetime(2014, 1, 1, 12, 0, 2, 4500), 3), (datetime.datetime(2014, 1, 1, 12, 0, 2, 7800), 4), (datetime.datetime(2014, 1, 1, 12, 0, 3, 8), 2.5), ], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5), (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5), ], ts.fetch()) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9), ], ignore_too_old_timestamps=True, before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5), (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5), ], ts.fetch()) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9), (datetime.datetime(2014, 1, 1, 12, 0, 3, 9), 4.5), ], ignore_too_old_timestamps=True, before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5), (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 3.5), ], ts.fetch())
def test_down_sampling_with_max_size_and_method_max(self): ts = carbonara.AggregatedTimeSerie(sampling='1Min', max_size=2, aggregation_method='max') ts.update( carbonara.TimeSerie.from_data([ datetime.datetime(2014, 1, 1, 12, 0, 0), datetime.datetime(2014, 1, 1, 12, 1, 4), datetime.datetime(2014, 1, 1, 12, 1, 9), datetime.datetime(2014, 1, 1, 12, 2, 12) ], [3, 5, 70, 1])) self.assertEqual(2, len(ts)) self.assertEqual(70, ts[datetime.datetime(2014, 1, 1, 12, 1, 0)]) self.assertEqual(1, ts[datetime.datetime(2014, 1, 1, 12, 2, 0)])
def test_serialize(self): ts = carbonara.AggregatedTimeSerie(sampling=0.5, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 0, 1234), 3), (datetime.datetime(2014, 1, 1, 12, 0, 0, 321), 6), (datetime.datetime(2014, 1, 1, 12, 1, 4, 234), 5), (datetime.datetime(2014, 1, 1, 12, 1, 9, 32), 7), (datetime.datetime(2014, 1, 1, 12, 2, 12, 532), 1), ], before_truncate_callback=ts.update) self.assertEqual( ts, carbonara.AggregatedTimeSerie.unserialize(ts.serialize()))
def _get_measures_timeserie(self, metric, aggregation, from_timestamp=None, to_timestamp=None): try: all_keys = self._list_split_keys_for_metric( metric, aggregation.method, aggregation.granularity) except MetricDoesNotExist: return carbonara.AggregatedTimeSerie( sampling=aggregation.granularity, aggregation_method=aggregation.method) if from_timestamp: from_timestamp = carbonara.SplitKey.from_timestamp_and_sampling( from_timestamp, aggregation.granularity) if to_timestamp: to_timestamp = carbonara.SplitKey.from_timestamp_and_sampling( to_timestamp, aggregation.granularity) keys = [ key for key in sorted(all_keys) if ((not from_timestamp or key >= from_timestamp) and ( not to_timestamp or key <= to_timestamp)) ] timeseries = self._get_measures_and_unserialize( metric, keys, aggregation.method) ts = carbonara.AggregatedTimeSerie.from_timeseries( sampling=aggregation.granularity, aggregation_method=aggregation.method, timeseries=timeseries) # We need to truncate because: # - If the driver is not in WRITE_FULL mode, then it might read too # much data that will be deleted once the split is rewritten. Just # truncate so we don't return it. # - If the driver is in WRITE_FULL but the archive policy has been # resized, we might still have too much points stored, which will be # deleted at a later point when new points will be procecessed. # Truncate to be sure we don't return them. if aggregation.timespan is not None: ts.truncate(aggregation.timespan) return ts
def test_aggregated_different_archive_no_overlap2(self): tsc1 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 50, 'agg': 'mean' } tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling']) tsc2 = carbonara.AggregatedTimeSerie(sampling=numpy.timedelta64( 60, 's'), max_size=50, aggregation_method='mean') tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 3, 0), 4)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc1)) self.assertRaises(cross_metric.UnAggregableTimeseries, cross_metric.aggregated, [tsc1['return'], tsc2], aggregation='mean')
def test_from_timeseries(self): sampling = 5 points = 100000 ts = carbonara.TimeSerie.from_data(timestamps=map( datetime.datetime.utcfromtimestamp, six.moves.range(points)), values=six.moves.range(points)) agg = carbonara.AggregatedTimeSerie(sampling=sampling, aggregation_method='mean') agg.update(ts) split = [t[1] for t in list(agg.split())] self.assertEqual( agg, carbonara.AggregatedTimeSerie.from_timeseries( split, sampling=agg.sampling, max_size=agg.max_size, aggregation_method=agg.aggregation_method))
def test_split(self): sampling = 5 points = 100000 ts = carbonara.TimeSerie.from_data(timestamps=map( datetime.datetime.utcfromtimestamp, six.moves.range(points)), values=six.moves.range(points)) agg = carbonara.AggregatedTimeSerie(sampling=sampling, aggregation_method='mean') agg.update(ts) grouped_points = list(agg.split()) self.assertEqual( math.ceil((points / float(sampling)) / carbonara.AggregatedTimeSerie.POINTS_PER_SPLIT), len(grouped_points)) self.assertEqual("0.0", grouped_points[0][0]) # 14400 × 5s = 20 hours self.assertEqual("72000.0", grouped_points[1][0]) self.assertEqual(carbonara.AggregatedTimeSerie.POINTS_PER_SPLIT, len(grouped_points[0][1]))
def test_back_window(self): """Back window testing. Test the back window on an archive is not longer than the window we aggregate on. """ ts = carbonara.AggregatedTimeSerie(sampling=1, max_size=60, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 1, 2300), 1), (datetime.datetime(2014, 1, 1, 12, 0, 1, 4600), 2), (datetime.datetime(2014, 1, 1, 12, 0, 2, 4500), 3), (datetime.datetime(2014, 1, 1, 12, 0, 2, 7800), 4), (datetime.datetime(2014, 1, 1, 12, 0, 3, 8), 2.5), ], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5), (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5), ], ts.fetch()) try: tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9), ]) except carbonara.NoDeloreanAvailable as e: self.assertEqual( six.text_type(e), u"2014-01-01 12:00:02.000099 is before 2014-01-01 12:00:03") self.assertEqual(datetime.datetime(2014, 1, 1, 12, 0, 2, 99), e.bad_timestamp) self.assertEqual(datetime.datetime(2014, 1, 1, 12, 0, 3), e.first_timestamp) else: self.fail("No exception raised")
def test_fetch_agg_pct(self): ts = carbonara.AggregatedTimeSerie(sampling=1, max_size=3600 * 24, aggregation_method='90pct') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 0, 0, 123), 4), (datetime.datetime(2014, 1, 1, 12, 0, 2), 4)], before_truncate_callback=ts.update) result = ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)) reference = [(pandas.Timestamp('2014-01-01 12:00:00'), 1.0, 3.9), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 4)] self.assertEqual(len(reference), len(result)) for ref, res in zip(reference, result): self.assertEqual(ref[0], res[0]) self.assertEqual(ref[1], res[1]) # Rounding \o/ self.assertAlmostEqual(ref[2], res[2]) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 2, 113), 110)], before_truncate_callback=ts.update) result = ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)) reference = [(pandas.Timestamp('2014-01-01 12:00:00'), 1.0, 3.9), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 99.4)] self.assertEqual(len(reference), len(result)) for ref, res in zip(reference, result): self.assertEqual(ref[0], res[0]) self.assertEqual(ref[1], res[1]) # Rounding \o/ self.assertAlmostEqual(ref[2], res[2])
def _compute_split_operations(self, metric, aggregations_and_timeseries, previous_oldest_mutable_timestamp, oldest_mutable_timestamp): """Compute changes to a metric and return operations to be done. Based on an aggregations list and a grouped timeseries, this computes what needs to be deleted and stored for a metric and returns it. :param metric: The metric :param aggregations_and_timeseries: A dictionary of timeseries of the form {aggregation: timeseries}. :param previous_oldest_mutable_timestamp: The previous oldest storable timestamp from the previous backwindow. :param oldest_mutable_timestamp: The current oldest storable timestamp from the current backwindow. :return: A tuple (keys_to_delete, keys_to_store) where keys_to_delete is a set of `carbonara.SplitKey` to delete and where keys_to_store is a dictionary of the form {key: aggts} where key is a `carbonara.SplitKey` and aggts a `carbonara.AggregatedTimeSerie` to be serialized. """ # We only need to check for rewrite if driver is not in WRITE_FULL mode # and if we already stored splits once need_rewrite = (not self.WRITE_FULL and previous_oldest_mutable_timestamp is not None) aggregations_needing_list_of_keys = set() oldest_values = {} for aggregation, ts in six.iteritems(aggregations_and_timeseries): # Don't do anything if the timeseries is empty if not ts: continue agg_oldest_values = { 'oldest_point_to_keep': ts.truncate(aggregation.timespan) if aggregation.timespan else None, 'prev_oldest_mutable_key': None, 'oldest_mutable_key': None } if previous_oldest_mutable_timestamp and (aggregation.timespan or need_rewrite): previous_oldest_mutable_key = ts.get_split_key( previous_oldest_mutable_timestamp) oldest_mutable_key = ts.get_split_key(oldest_mutable_timestamp) # only cleanup if there is a new object, as there must be a new # object for an old object to be cleanup if previous_oldest_mutable_key != oldest_mutable_key: aggregations_needing_list_of_keys.add(aggregation) agg_oldest_values['prev_oldest_mutable_key'] = ( previous_oldest_mutable_key) agg_oldest_values['oldest_mutable_key'] = ( oldest_mutable_key) oldest_values[aggregation.granularity] = agg_oldest_values all_existing_keys = self._list_split_keys( {metric: aggregations_needing_list_of_keys})[metric] # NOTE(jd) This dict uses (key, aggregation) tuples as keys because # using just (key) would not carry the aggregation method and therefore # would not be unique per aggregation! keys_and_split_to_store = {} deleted_keys = set() for aggregation, ts in six.iteritems(aggregations_and_timeseries): # Don't do anything if the timeseries is empty if not ts: continue agg_oldest_values = oldest_values[aggregation.granularity] oldest_key_to_keep = ts.get_split_key( agg_oldest_values['oldest_point_to_keep']) # If we listed the keys for the aggregation, that's because we need # to check for cleanup and/or rewrite if aggregation in all_existing_keys: # FIXME(jd) This should be sorted by the driver and asserted it # is in tests. It's likely backends already sort anyway. existing_keys = sorted(all_existing_keys[aggregation]) # First, check for old splits to delete if aggregation.timespan: for key in list(existing_keys): # NOTE(jd) Only delete if the key is strictly # inferior the timestamp; we don't delete any # timeserie split that contains our timestamp, so # we prefer to keep a bit more than deleting too # much if key >= oldest_key_to_keep: break deleted_keys.add((key, aggregation)) existing_keys.remove(key) # Rewrite all read-only splits just for fun (and # compression). This only happens if # `previous_oldest_mutable_timestamp' exists, which means # we already wrote some splits at some point – so this is # not the first time we treat this timeserie. if need_rewrite: for key in existing_keys: if agg_oldest_values['prev_oldest_mutable_key'] <= key: if key >= agg_oldest_values['oldest_mutable_key']: break LOG.debug( "Compressing previous split %s (%s) for " "metric %s", key, aggregation.method, metric) # NOTE(jd) Rewrite it entirely for fun (and # later for compression). For that, we just # pass an empty split. keys_and_split_to_store[(key, aggregation)] = ( carbonara.AggregatedTimeSerie(aggregation)) for key, split in ts.split(): if key >= oldest_key_to_keep: LOG.debug("Storing split %s (%s) for metric %s", key, aggregation.method, metric) keys_and_split_to_store[(key, aggregation)] = split return (deleted_keys, keys_and_split_to_store)
def _get_measures_timeserie(self, metric, aggregation, granularity, from_timestamp=None, to_timestamp=None): # Find the number of point for d in metric.archive_policy.definition: if d.granularity == granularity: points = d.points break else: raise storage.GranularityDoesNotExist(metric, granularity) all_keys = None try: all_keys = self._list_split_keys_for_metric( metric, aggregation, granularity) except storage.MetricDoesNotExist: # This can happen if it's an old metric with a TimeSerieArchive all_keys = None if not all_keys: # It does not mean we have no data: it can be an old metric with a # TimeSerieArchive. try: data = self._get_metric_archive(metric, aggregation) except (storage.MetricDoesNotExist, storage.AggregationDoesNotExist): # It really does not exist for d in metric.archive_policy.definition: if d.granularity == granularity: return carbonara.AggregatedTimeSerie( sampling=granularity, aggregation_method=aggregation, max_size=d.points) raise storage.GranularityDoesNotExist(metric, granularity) else: archive = carbonara.TimeSerieArchive.unserialize(data) # It's an old metric with an TimeSerieArchive! for ts in archive.agg_timeseries: if ts.sampling == granularity: return ts raise storage.GranularityDoesNotExist(metric, granularity) if from_timestamp: from_timestamp = carbonara.AggregatedTimeSerie.get_split_key( from_timestamp, granularity) if to_timestamp: to_timestamp = carbonara.AggregatedTimeSerie.get_split_key( to_timestamp, granularity) timeseries = filter( lambda x: x is not None, self._map_in_thread( self._get_measures_and_unserialize, ((metric, key, aggregation, granularity) for key in all_keys if ((not from_timestamp or key >= from_timestamp) and ( not to_timestamp or key <= to_timestamp))))) return carbonara.AggregatedTimeSerie.from_timeseries( sampling=granularity, aggregation_method=aggregation, timeseries=timeseries, max_size=points)
def _add_measures(self, metric, aggregations, grouped_serie, previous_oldest_mutable_timestamp, oldest_mutable_timestamp): # We only need to check for rewrite if driver is not in WRITE_FULL mode # and if we already stored splits once need_rewrite = ( not self.WRITE_FULL and previous_oldest_mutable_timestamp is not None ) timeseries = {} aggregations_needing_list_of_keys = set() for aggregation in aggregations: ts = carbonara.AggregatedTimeSerie.from_grouped_serie( grouped_serie, aggregation) # Don't do anything if the timeserie is empty if not ts: continue # Otherwise, store it for the next iteration timeseries[aggregation] = ts if aggregation.timespan: oldest_point_to_keep = ts.truncate(aggregation.timespan) else: oldest_point_to_keep = None if previous_oldest_mutable_timestamp and (aggregation.timespan or need_rewrite): previous_oldest_mutable_key = ts.get_split_key( previous_oldest_mutable_timestamp) oldest_mutable_key = ts.get_split_key(oldest_mutable_timestamp) # only cleanup if there is a new object, as there must be a new # object for an old object to be cleanup if previous_oldest_mutable_key != oldest_mutable_key: aggregations_needing_list_of_keys.add(aggregation) all_existing_keys = self._list_split_keys( metric, aggregations_needing_list_of_keys) # NOTE(jd) This dict uses (key, aggregation) tuples as keys because # using just (key) would not carry the aggregation method and therefore # would not be unique per aggregation! keys_and_split_to_store = {} deleted_keys = set() for aggregation, ts in six.iteritems(timeseries): oldest_key_to_keep = ts.get_split_key(oldest_point_to_keep) # If we listed the keys for the aggregation, that's because we need # to check for cleanup and/or rewrite if aggregation in all_existing_keys: # FIXME(jd) This should be sorted by the driver and asserted it # is in tests. It's likely backends already sort anyway. existing_keys = sorted(all_existing_keys[aggregation]) # First, check for old splits to delete if aggregation.timespan: for key in list(existing_keys): # NOTE(jd) Only delete if the key is strictly # inferior the timestamp; we don't delete any # timeserie split that contains our timestamp, so # we prefer to keep a bit more than deleting too # much if key >= oldest_key_to_keep: break deleted_keys.add((key, aggregation)) existing_keys.remove(key) # Rewrite all read-only splits just for fun (and # compression). This only happens if # `previous_oldest_mutable_timestamp' exists, which means # we already wrote some splits at some point – so this is # not the first time we treat this timeserie. if need_rewrite: for key in existing_keys: if previous_oldest_mutable_key <= key: if key >= oldest_mutable_key: break LOG.debug( "Compressing previous split %s (%s) for " "metric %s", key, aggregation.method, metric) # NOTE(jd) Rewrite it entirely for fun (and # later for compression). For that, we just # pass an empty split. keys_and_split_to_store[ (key, aggregation)] = ( carbonara.AggregatedTimeSerie( aggregation) ) for key, split in ts.split(): if key >= oldest_key_to_keep: LOG.debug( "Storing split %s (%s) for metric %s", key, aggregation.method, metric) keys_and_split_to_store[(key, aggregation)] = split self._delete_metric_splits(metric, deleted_keys) self._store_timeserie_splits( metric, keys_and_split_to_store, oldest_mutable_timestamp)