Пример #1
0
    def test_aggregated_different_archive_overlap_edge_missing2(self):
        tsc1 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=10,
                                             aggregation_method='mean')
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling)
        tsc2 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=10,
                                             aggregation_method='mean')
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling)

        tsb1.set_values([
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 4),
        ],
                        before_truncate_callback=tsc1.update)

        tsb2.set_values([
            (datetime.datetime(2014, 1, 1, 11, 0, 0), 4),
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 4),
        ],
                        before_truncate_callback=tsc2.update)

        output = carbonara.AggregatedTimeSerie.aggregated([tsc1, tsc2],
                                                          aggregation='mean')
        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:03:00'), 60.0, 4.0),
        ], output)
Пример #2
0
    def test_aggregated_different_archive_no_overlap2(self):
        tsc1 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=50,
                                             aggregation_method='mean')
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling)
        tsc2 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=50,
                                             aggregation_method='mean')

        tsb1.set_values([(datetime.datetime(2014, 1, 1, 12, 3, 0), 4)],
                        before_truncate_callback=tsc1.update)
        self.assertRaises(carbonara.UnAggregableTimeseries,
                          carbonara.AggregatedTimeSerie.aggregated,
                          [tsc1, tsc2],
                          aggregation='mean')
Пример #3
0
    def test_fetch_nano(self):
        ts = carbonara.AggregatedTimeSerie(sampling=0.2,
                                           max_size=10,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 11, 46, 0, 200123), 4),
            (datetime.datetime(2014, 1, 1, 11, 46, 0, 340000), 8),
            (datetime.datetime(2014, 1, 1, 11, 47, 0, 323154), 50),
            (datetime.datetime(2014, 1, 1, 11, 48, 0, 590903), 4),
            (datetime.datetime(2014, 1, 1, 11, 48, 0, 903291), 4),
        ],
                       before_truncate_callback=ts.update)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 11, 48, 0, 821312), 5),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual(
            [(datetime.datetime(2014, 1, 1, 11, 46, 0, 200000), 0.2, 6.0),
             (datetime.datetime(2014, 1, 1, 11, 47, 0, 200000), 0.2, 50.0),
             (datetime.datetime(2014, 1, 1, 11, 48, 0, 400000), 0.2, 4.0),
             (datetime.datetime(2014, 1, 1, 11, 48, 0, 800000), 0.2, 4.5)],
            ts.fetch())
Пример #4
0
    def test_fetch_agg_max(self):
        ts = carbonara.AggregatedTimeSerie(sampling=60,
                                           max_size=60,
                                           aggregation_method='max')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                        (datetime.datetime(2014, 1, 1, 12, 1, 4), 4),
                        (datetime.datetime(2014, 1, 1, 12, 1, 9), 7),
                        (datetime.datetime(2014, 1, 1, 12, 2, 1), 15),
                        (datetime.datetime(2014, 1, 1, 12, 2, 12), 1)],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:00'), 60.0, 3),
            (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 7),
            (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 15),
        ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 2, 13), 110)],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:00'), 60.0, 3),
            (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 7),
            (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 110),
        ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
Пример #5
0
    def test_fetch_agg_std(self):
        ts = carbonara.AggregatedTimeSerie(sampling=60,
                                           max_size=60,
                                           aggregation_method='std')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                        (datetime.datetime(2014, 1, 1, 12, 1, 4), 4),
                        (datetime.datetime(2014, 1, 1, 12, 1, 9), 7),
                        (datetime.datetime(2014, 1, 1, 12, 2, 1), 15),
                        (datetime.datetime(2014, 1, 1, 12, 2, 12), 1)],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:01:00'), 60.0,
             2.1213203435596424),
            (pandas.Timestamp('2014-01-01 12:02:00'), 60.0,
             9.8994949366116654),
        ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 2, 13), 110)],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:01:00'), 60.0,
             2.1213203435596424),
            (pandas.Timestamp('2014-01-01 12:02:00'), 60.0,
             59.304300012730948),
        ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
Пример #6
0
    def test_74_percentile_serialized(self):
        ts = carbonara.AggregatedTimeSerie(sampling='1Min',
                                           aggregation_method='74pct')
        ts.update(
            carbonara.TimeSerie.from_tuples([
                (datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                (datetime.datetime(2014, 1, 1, 12, 0, 4), 5),
                (datetime.datetime(2014, 1, 1, 12, 0, 9), 6)
            ]))

        self.assertEqual(1, len(ts))
        self.assertEqual(5.48, ts[datetime.datetime(2014, 1, 1, 12, 0, 0)])

        # Serialize and unserialize
        ts = carbonara.AggregatedTimeSerie.unserialize(ts.serialize())

        ts.update(
            carbonara.TimeSerie.from_tuples([
                (datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                (datetime.datetime(2014, 1, 1, 12, 0, 4), 5),
                (datetime.datetime(2014, 1, 1, 12, 0, 9), 6)
            ]))

        self.assertEqual(1, len(ts))
        self.assertEqual(5.48, ts[datetime.datetime(2014, 1, 1, 12, 0, 0)])
Пример #7
0
    def _get_splits_and_unserialize(self, metrics_aggregations_keys):
        """Get splits and unserialize them

        :param metrics_aggregations_keys: A dict where keys are
                                         `storage.Metric` and values are dict
                                          of {Aggregation: [SplitKey]} to
                                          retrieve.
        :return: A dict where keys are `storage.Metric` and values are dict
                 {aggregation: [`carbonara.AggregatedTimeSerie`]}.
        """
        raw_measures = self._get_splits(metrics_aggregations_keys)
        results = collections.defaultdict(
            lambda: collections.defaultdict(list))
        for metric, aggregations_and_raws in six.iteritems(raw_measures):
            for aggregation, raws in six.iteritems(aggregations_and_raws):
                for key, raw in six.moves.zip(
                        metrics_aggregations_keys[metric][aggregation], raws):
                    try:
                        ts = carbonara.AggregatedTimeSerie.unserialize(
                            raw, key, aggregation)
                    except carbonara.InvalidData:
                        LOG.error(
                            "Data corruption detected for %s "
                            "aggregated `%s' timeserie, granularity "
                            "`%s' around time `%s', ignoring.", metric.id,
                            aggregation.method, key.sampling, key)
                        ts = carbonara.AggregatedTimeSerie(aggregation)
                    results[metric][aggregation].append(ts)
        return results
Пример #8
0
    def _get_splits_and_unserialize(self, metric, keys_and_aggregations):
        """Get splits and unserialize them

        :param metric: The metric to retrieve.
        :param keys_and_aggregations: A list of tuple (SplitKey, Aggregation)
                                      to retrieve.
        :return: A list of AggregatedTimeSerie.
        """
        if not keys_and_aggregations:
            return []
        raw_measures = self._get_measures(metric, keys_and_aggregations)
        results = []
        for (key, aggregation), raw in six.moves.zip(
                keys_and_aggregations, raw_measures):
            try:
                ts = carbonara.AggregatedTimeSerie.unserialize(
                    raw, key, aggregation)
            except carbonara.InvalidData:
                LOG.error("Data corruption detected for %s "
                          "aggregated `%s' timeserie, granularity `%s' "
                          "around time `%s', ignoring.",
                          metric.id, aggregation.method, key.sampling, key)
                ts = carbonara.AggregatedTimeSerie(aggregation)
            results.append(ts)
        return results
Пример #9
0
    def test_aggregated_different_archive_overlap_edge_missing1(self):
        tsc1 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=10,
                                             aggregation_method='mean')
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling)
        tsc2 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=10,
                                             aggregation_method='mean')
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling)

        tsb1.set_values([
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 9),
            (datetime.datetime(2014, 1, 1, 12, 4, 0), 1),
            (datetime.datetime(2014, 1, 1, 12, 5, 0), 2),
            (datetime.datetime(2014, 1, 1, 12, 6, 0), 7),
            (datetime.datetime(2014, 1, 1, 12, 7, 0), 5),
            (datetime.datetime(2014, 1, 1, 12, 8, 0), 3),
        ],
                        before_truncate_callback=tsc1.update)

        tsb2.set_values([
            (datetime.datetime(2014, 1, 1, 11, 0, 0), 6),
            (datetime.datetime(2014, 1, 1, 12, 1, 0), 2),
            (datetime.datetime(2014, 1, 1, 12, 2, 0), 13),
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 24),
            (datetime.datetime(2014, 1, 1, 12, 4, 0), 4),
            (datetime.datetime(2014, 1, 1, 12, 5, 0), 16),
            (datetime.datetime(2014, 1, 1, 12, 6, 0), 12),
        ],
                        before_truncate_callback=tsc2.update)

        # By default we require 100% of point that overlap
        # but we allow that the last datapoint is missing
        # of the precisest granularity
        output = carbonara.AggregatedTimeSerie.aggregated([tsc1, tsc2],
                                                          aggregation='sum')

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:03:00'), 60.0, 33.0),
            (pandas.Timestamp('2014-01-01 12:04:00'), 60.0, 5.0),
            (pandas.Timestamp('2014-01-01 12:05:00'), 60.0, 18.0),
            (pandas.Timestamp('2014-01-01 12:06:00'), 60.0, 19.0),
        ], output)
Пример #10
0
def _get_measures_timeserie(storage, ref, granularity, *args, **kwargs):
    agg = ref.metric.archive_policy.get_aggregation(ref.aggregation,
                                                    granularity)
    try:
        data = storage.get_aggregated_measures({ref.metric: [agg]}, *args,
                                               **kwargs)[ref.metric][agg]
    except gnocchi_storage.MetricDoesNotExist:
        data = carbonara.AggregatedTimeSerie(
            carbonara.Aggregation(ref.aggregation, granularity, None))
    return (ref, data)
Пример #11
0
 def test_down_sampling(self):
     ts = carbonara.AggregatedTimeSerie(sampling='5Min',
                                        aggregation_method='mean')
     ts.update(
         carbonara.TimeSerie.from_data([
             datetime.datetime(2014, 1, 1, 12, 0, 0),
             datetime.datetime(2014, 1, 1, 12, 0, 4),
             datetime.datetime(2014, 1, 1, 12, 0, 9)
         ], [3, 5, 7]))
     self.assertEqual(1, len(ts))
     self.assertEqual(5, ts[datetime.datetime(2014, 1, 1, 12, 0, 0)])
Пример #12
0
    def test_before_epoch(self):
        ts = carbonara.AggregatedTimeSerie(sampling='1Min',
                                           aggregation_method='74pct')

        self.assertRaises(
            carbonara.BeforeEpochError, ts.update,
            carbonara.TimeSerie.from_tuples([
                (datetime.datetime(1950, 1, 1, 12, 0, 0), 3),
                (datetime.datetime(2014, 1, 1, 12, 0, 4), 5),
                (datetime.datetime(2014, 1, 1, 12, 0, 9), 6)
            ]))
Пример #13
0
    def _get_measures_timeserie(self,
                                metric,
                                aggregation,
                                granularity,
                                from_timestamp=None,
                                to_timestamp=None):

        # Find the number of point
        for d in metric.archive_policy.definition:
            if d.granularity == granularity:
                points = d.points
                break
        else:
            raise storage.GranularityDoesNotExist(metric, granularity)

        all_keys = None
        try:
            all_keys = self._list_split_keys_for_metric(
                metric, aggregation, granularity)
        except storage.MetricDoesNotExist:
            for d in metric.archive_policy.definition:
                if d.granularity == granularity:
                    return carbonara.AggregatedTimeSerie(
                        sampling=granularity,
                        aggregation_method=aggregation,
                        max_size=d.points)
            raise storage.GranularityDoesNotExist(metric, granularity)

        if from_timestamp:
            from_timestamp = str(
                carbonara.SplitKey.from_timestamp_and_sampling(
                    from_timestamp, granularity))

        if to_timestamp:
            to_timestamp = str(
                carbonara.SplitKey.from_timestamp_and_sampling(
                    to_timestamp, granularity))

        timeseries = list(
            filter(
                lambda x: x is not None,
                self._map_in_thread(
                    self._get_measures_and_unserialize,
                    ((metric, key, aggregation, granularity)
                     for key in sorted(all_keys)
                     if ((not from_timestamp or key >= from_timestamp) and (
                         not to_timestamp or key <= to_timestamp))))))

        return carbonara.AggregatedTimeSerie.from_timeseries(
            sampling=granularity,
            aggregation_method=aggregation,
            timeseries=timeseries,
            max_size=points)
Пример #14
0
    def test_95_percentile(self):
        ts = carbonara.AggregatedTimeSerie(sampling='1Min',
                                           aggregation_method='95pct')
        ts.update(
            carbonara.TimeSerie.from_tuples([
                (datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                (datetime.datetime(2014, 1, 1, 12, 0, 4), 5),
                (datetime.datetime(2014, 1, 1, 12, 0, 9), 6)
            ]))

        self.assertEqual(1, len(ts))
        self.assertEqual(5.9000000000000004,
                         ts[datetime.datetime(2014, 1, 1, 12, 0, 0)])
Пример #15
0
    def test_no_truncation(self):
        ts = carbonara.AggregatedTimeSerie(sampling=60,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie()

        for i in six.moves.range(1, 11):
            tsb.set_values(
                [(datetime.datetime(2014, 1, 1, 12, i, i), float(i))],
                before_truncate_callback=ts.update)
            tsb.set_values(
                [(datetime.datetime(2014, 1, 1, 12, i, i + 1), float(i + 1))],
                before_truncate_callback=ts.update)
            self.assertEqual(i, len(ts.fetch()))
Пример #16
0
 def test_max_size(self):
     ts = carbonara.AggregatedTimeSerie(sampling=1,
                                        max_size=2,
                                        aggregation_method='mean')
     ts.update(
         carbonara.TimeSerie.from_data([
             datetime.datetime(2014, 1, 1, 12, 0, 0),
             datetime.datetime(2014, 1, 1, 12, 0, 4),
             datetime.datetime(2014, 1, 1, 12, 0, 9)
         ], [3, 5, 6]))
     self.assertEqual(2, len(ts))
     self.assertEqual(5, ts[0])
     self.assertEqual(6, ts[1])
Пример #17
0
 def test_to_dict_from_dict(self):
     ts = carbonara.AggregatedTimeSerie(sampling='1Min',
                                        max_size=2,
                                        aggregation_method='max')
     ts.update(
         carbonara.TimeSerie.from_data([
             datetime.datetime(2014, 1, 1, 12, 0, 0),
             datetime.datetime(2014, 1, 1, 12, 1, 4),
             datetime.datetime(2014, 1, 1, 12, 1, 9),
             datetime.datetime(2014, 1, 1, 12, 2, 12)
         ], [3, 5, 7, 1]))
     ts2 = carbonara.AggregatedTimeSerie.from_dict(ts.to_dict())
     self.assertEqual(ts, ts2)
Пример #18
0
    def test_fetch(self):
        ts = carbonara.AggregatedTimeSerie(sampling=60,
                                           max_size=10,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 11, 46, 4), 4),
            (datetime.datetime(2014, 1, 1, 11, 47, 34), 8),
            (datetime.datetime(2014, 1, 1, 11, 50, 54), 50),
            (datetime.datetime(2014, 1, 1, 11, 54, 45), 4),
            (datetime.datetime(2014, 1, 1, 11, 56, 49), 4),
            (datetime.datetime(2014, 1, 1, 11, 57, 22), 6),
            (datetime.datetime(2014, 1, 1, 11, 58, 22), 5),
            (datetime.datetime(2014, 1, 1, 12, 1, 4), 4),
            (datetime.datetime(2014, 1, 1, 12, 1, 9), 7),
            (datetime.datetime(2014, 1, 1, 12, 2, 1), 15),
            (datetime.datetime(2014, 1, 1, 12, 2, 12), 1),
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 3),
            (datetime.datetime(2014, 1, 1, 12, 4, 9), 7),
            (datetime.datetime(2014, 1, 1, 12, 5, 1), 15),
            (datetime.datetime(2014, 1, 1, 12, 5, 12), 1),
            (datetime.datetime(2014, 1, 1, 12, 6, 0, 2), 3),
        ],
                       before_truncate_callback=ts.update)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 6), 5),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual([(datetime.datetime(2014, 1, 1, 11, 54), 60.0, 4.0),
                          (datetime.datetime(2014, 1, 1, 11, 56), 60.0, 4.0),
                          (datetime.datetime(2014, 1, 1, 11, 57), 60.0, 6.0),
                          (datetime.datetime(2014, 1, 1, 11, 58), 60.0, 5.0),
                          (datetime.datetime(2014, 1, 1, 12, 1), 60.0, 5.5),
                          (datetime.datetime(2014, 1, 1, 12, 2), 60.0, 8.0),
                          (datetime.datetime(2014, 1, 1, 12, 3), 60.0, 3.0),
                          (datetime.datetime(2014, 1, 1, 12, 4), 60.0, 7.0),
                          (datetime.datetime(2014, 1, 1, 12, 5), 60.0, 8.0),
                          (datetime.datetime(2014, 1, 1, 12, 6), 60.0, 4.0)],
                         ts.fetch())

        self.assertEqual([(datetime.datetime(2014, 1, 1, 12, 1), 60.0, 5.5),
                          (datetime.datetime(2014, 1, 1, 12, 2), 60.0, 8.0),
                          (datetime.datetime(2014, 1, 1, 12, 3), 60.0, 3.0),
                          (datetime.datetime(2014, 1, 1, 12, 4), 60.0, 7.0),
                          (datetime.datetime(2014, 1, 1, 12, 5), 60.0, 8.0),
                          (datetime.datetime(2014, 1, 1, 12, 6), 60.0, 4.0)],
                         ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
Пример #19
0
    def test_back_window_ignore(self):
        """Back window testing.

        Test the back window on an archive is not longer than the window we
        aggregate on.
        """
        ts = carbonara.AggregatedTimeSerie(sampling=1,
                                           max_size=60,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 1, 2300), 1),
            (datetime.datetime(2014, 1, 1, 12, 0, 1, 4600), 2),
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 4500), 3),
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 7800), 4),
            (datetime.datetime(2014, 1, 1, 12, 0, 3, 8), 2.5),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5),
            (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5),
            (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5),
        ], ts.fetch())

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9),
        ],
                       ignore_too_old_timestamps=True,
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5),
            (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5),
            (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5),
        ], ts.fetch())

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9),
            (datetime.datetime(2014, 1, 1, 12, 0, 3, 9), 4.5),
        ],
                       ignore_too_old_timestamps=True,
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5),
            (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5),
            (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 3.5),
        ], ts.fetch())
Пример #20
0
 def test_down_sampling_with_max_size_and_method_max(self):
     ts = carbonara.AggregatedTimeSerie(sampling='1Min',
                                        max_size=2,
                                        aggregation_method='max')
     ts.update(
         carbonara.TimeSerie.from_data([
             datetime.datetime(2014, 1, 1, 12, 0, 0),
             datetime.datetime(2014, 1, 1, 12, 1, 4),
             datetime.datetime(2014, 1, 1, 12, 1, 9),
             datetime.datetime(2014, 1, 1, 12, 2, 12)
         ], [3, 5, 70, 1]))
     self.assertEqual(2, len(ts))
     self.assertEqual(70, ts[datetime.datetime(2014, 1, 1, 12, 1, 0)])
     self.assertEqual(1, ts[datetime.datetime(2014, 1, 1, 12, 2, 0)])
Пример #21
0
    def test_serialize(self):
        ts = carbonara.AggregatedTimeSerie(sampling=0.5,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 0, 1234), 3),
            (datetime.datetime(2014, 1, 1, 12, 0, 0, 321), 6),
            (datetime.datetime(2014, 1, 1, 12, 1, 4, 234), 5),
            (datetime.datetime(2014, 1, 1, 12, 1, 9, 32), 7),
            (datetime.datetime(2014, 1, 1, 12, 2, 12, 532), 1),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual(
            ts, carbonara.AggregatedTimeSerie.unserialize(ts.serialize()))
Пример #22
0
    def _get_measures_timeserie(self,
                                metric,
                                aggregation,
                                from_timestamp=None,
                                to_timestamp=None):
        try:
            all_keys = self._list_split_keys_for_metric(
                metric, aggregation.method, aggregation.granularity)
        except MetricDoesNotExist:
            return carbonara.AggregatedTimeSerie(
                sampling=aggregation.granularity,
                aggregation_method=aggregation.method)

        if from_timestamp:
            from_timestamp = carbonara.SplitKey.from_timestamp_and_sampling(
                from_timestamp, aggregation.granularity)

        if to_timestamp:
            to_timestamp = carbonara.SplitKey.from_timestamp_and_sampling(
                to_timestamp, aggregation.granularity)

        keys = [
            key for key in sorted(all_keys)
            if ((not from_timestamp or key >= from_timestamp) and (
                not to_timestamp or key <= to_timestamp))
        ]

        timeseries = self._get_measures_and_unserialize(
            metric, keys, aggregation.method)

        ts = carbonara.AggregatedTimeSerie.from_timeseries(
            sampling=aggregation.granularity,
            aggregation_method=aggregation.method,
            timeseries=timeseries)
        # We need to truncate because:
        # - If the driver is not in WRITE_FULL mode, then it might read too
        # much data that will be deleted once the split is rewritten. Just
        # truncate so we don't return it.
        # - If the driver is in WRITE_FULL but the archive policy has been
        # resized, we might still have too much points stored, which will be
        # deleted at a later point when new points will be procecessed.
        # Truncate to be sure we don't return them.
        if aggregation.timespan is not None:
            ts.truncate(aggregation.timespan)
        return ts
Пример #23
0
    def test_aggregated_different_archive_no_overlap2(self):
        tsc1 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 50,
            'agg': 'mean'
        }
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling'])
        tsc2 = carbonara.AggregatedTimeSerie(sampling=numpy.timedelta64(
            60, 's'),
                                             max_size=50,
                                             aggregation_method='mean')

        tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 3, 0), 4)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc1))
        self.assertRaises(cross_metric.UnAggregableTimeseries,
                          cross_metric.aggregated, [tsc1['return'], tsc2],
                          aggregation='mean')
Пример #24
0
    def test_from_timeseries(self):
        sampling = 5
        points = 100000
        ts = carbonara.TimeSerie.from_data(timestamps=map(
            datetime.datetime.utcfromtimestamp, six.moves.range(points)),
                                           values=six.moves.range(points))
        agg = carbonara.AggregatedTimeSerie(sampling=sampling,
                                            aggregation_method='mean')
        agg.update(ts)

        split = [t[1] for t in list(agg.split())]

        self.assertEqual(
            agg,
            carbonara.AggregatedTimeSerie.from_timeseries(
                split,
                sampling=agg.sampling,
                max_size=agg.max_size,
                aggregation_method=agg.aggregation_method))
Пример #25
0
    def test_split(self):
        sampling = 5
        points = 100000
        ts = carbonara.TimeSerie.from_data(timestamps=map(
            datetime.datetime.utcfromtimestamp, six.moves.range(points)),
                                           values=six.moves.range(points))
        agg = carbonara.AggregatedTimeSerie(sampling=sampling,
                                            aggregation_method='mean')
        agg.update(ts)

        grouped_points = list(agg.split())

        self.assertEqual(
            math.ceil((points / float(sampling)) /
                      carbonara.AggregatedTimeSerie.POINTS_PER_SPLIT),
            len(grouped_points))
        self.assertEqual("0.0", grouped_points[0][0])
        # 14400 × 5s = 20 hours
        self.assertEqual("72000.0", grouped_points[1][0])
        self.assertEqual(carbonara.AggregatedTimeSerie.POINTS_PER_SPLIT,
                         len(grouped_points[0][1]))
Пример #26
0
    def test_back_window(self):
        """Back window testing.

        Test the back window on an archive is not longer than the window we
        aggregate on.
        """
        ts = carbonara.AggregatedTimeSerie(sampling=1,
                                           max_size=60,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 1, 2300), 1),
            (datetime.datetime(2014, 1, 1, 12, 0, 1, 4600), 2),
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 4500), 3),
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 7800), 4),
            (datetime.datetime(2014, 1, 1, 12, 0, 3, 8), 2.5),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5),
            (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5),
            (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5),
        ], ts.fetch())

        try:
            tsb.set_values([
                (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9),
            ])
        except carbonara.NoDeloreanAvailable as e:
            self.assertEqual(
                six.text_type(e),
                u"2014-01-01 12:00:02.000099 is before 2014-01-01 12:00:03")
            self.assertEqual(datetime.datetime(2014, 1, 1, 12, 0, 2, 99),
                             e.bad_timestamp)
            self.assertEqual(datetime.datetime(2014, 1, 1, 12, 0, 3),
                             e.first_timestamp)
        else:
            self.fail("No exception raised")
Пример #27
0
    def test_fetch_agg_pct(self):
        ts = carbonara.AggregatedTimeSerie(sampling=1,
                                           max_size=3600 * 24,
                                           aggregation_method='90pct')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                        (datetime.datetime(2014, 1, 1, 12, 0, 0, 123), 4),
                        (datetime.datetime(2014, 1, 1, 12, 0, 2), 4)],
                       before_truncate_callback=ts.update)

        result = ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0))
        reference = [(pandas.Timestamp('2014-01-01 12:00:00'), 1.0, 3.9),
                     (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 4)]

        self.assertEqual(len(reference), len(result))

        for ref, res in zip(reference, result):
            self.assertEqual(ref[0], res[0])
            self.assertEqual(ref[1], res[1])
            # Rounding \o/
            self.assertAlmostEqual(ref[2], res[2])

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 2, 113), 110)],
                       before_truncate_callback=ts.update)

        result = ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0))
        reference = [(pandas.Timestamp('2014-01-01 12:00:00'), 1.0, 3.9),
                     (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 99.4)]

        self.assertEqual(len(reference), len(result))

        for ref, res in zip(reference, result):
            self.assertEqual(ref[0], res[0])
            self.assertEqual(ref[1], res[1])
            # Rounding \o/
            self.assertAlmostEqual(ref[2], res[2])
Пример #28
0
    def _compute_split_operations(self, metric, aggregations_and_timeseries,
                                  previous_oldest_mutable_timestamp,
                                  oldest_mutable_timestamp):
        """Compute changes to a metric and return operations to be done.

        Based on an aggregations list and a grouped timeseries, this computes
        what needs to be deleted and stored for a metric and returns it.

        :param metric: The metric
        :param aggregations_and_timeseries: A dictionary of timeseries of the
                                            form {aggregation: timeseries}.
        :param previous_oldest_mutable_timestamp: The previous oldest storable
                                                  timestamp from the previous
                                                  backwindow.
        :param oldest_mutable_timestamp: The current oldest storable timestamp
                                         from the current backwindow.
        :return: A tuple (keys_to_delete, keys_to_store) where keys_to_delete
                 is a set of `carbonara.SplitKey` to delete and where
                 keys_to_store is a dictionary of the form {key: aggts}
                 where key is a `carbonara.SplitKey` and aggts a
                 `carbonara.AggregatedTimeSerie` to be serialized.
        """
        # We only need to check for rewrite if driver is not in WRITE_FULL mode
        # and if we already stored splits once
        need_rewrite = (not self.WRITE_FULL
                        and previous_oldest_mutable_timestamp is not None)

        aggregations_needing_list_of_keys = set()
        oldest_values = {}

        for aggregation, ts in six.iteritems(aggregations_and_timeseries):
            # Don't do anything if the timeseries is empty
            if not ts:
                continue

            agg_oldest_values = {
                'oldest_point_to_keep':
                ts.truncate(aggregation.timespan)
                if aggregation.timespan else None,
                'prev_oldest_mutable_key':
                None,
                'oldest_mutable_key':
                None
            }

            if previous_oldest_mutable_timestamp and (aggregation.timespan
                                                      or need_rewrite):
                previous_oldest_mutable_key = ts.get_split_key(
                    previous_oldest_mutable_timestamp)
                oldest_mutable_key = ts.get_split_key(oldest_mutable_timestamp)

                # only cleanup if there is a new object, as there must be a new
                # object for an old object to be cleanup
                if previous_oldest_mutable_key != oldest_mutable_key:
                    aggregations_needing_list_of_keys.add(aggregation)
                    agg_oldest_values['prev_oldest_mutable_key'] = (
                        previous_oldest_mutable_key)
                    agg_oldest_values['oldest_mutable_key'] = (
                        oldest_mutable_key)

            oldest_values[aggregation.granularity] = agg_oldest_values

        all_existing_keys = self._list_split_keys(
            {metric: aggregations_needing_list_of_keys})[metric]

        # NOTE(jd) This dict uses (key, aggregation) tuples as keys because
        # using just (key) would not carry the aggregation method and therefore
        # would not be unique per aggregation!
        keys_and_split_to_store = {}
        deleted_keys = set()

        for aggregation, ts in six.iteritems(aggregations_and_timeseries):
            # Don't do anything if the timeseries is empty
            if not ts:
                continue

            agg_oldest_values = oldest_values[aggregation.granularity]

            oldest_key_to_keep = ts.get_split_key(
                agg_oldest_values['oldest_point_to_keep'])

            # If we listed the keys for the aggregation, that's because we need
            # to check for cleanup and/or rewrite
            if aggregation in all_existing_keys:
                # FIXME(jd) This should be sorted by the driver and asserted it
                # is in tests. It's likely backends already sort anyway.
                existing_keys = sorted(all_existing_keys[aggregation])
                # First, check for old splits to delete
                if aggregation.timespan:
                    for key in list(existing_keys):
                        # NOTE(jd) Only delete if the key is strictly
                        # inferior the timestamp; we don't delete any
                        # timeserie split that contains our timestamp, so
                        # we prefer to keep a bit more than deleting too
                        # much
                        if key >= oldest_key_to_keep:
                            break
                        deleted_keys.add((key, aggregation))
                        existing_keys.remove(key)

                # Rewrite all read-only splits just for fun (and
                # compression). This only happens if
                # `previous_oldest_mutable_timestamp' exists, which means
                # we already wrote some splits at some point – so this is
                # not the first time we treat this timeserie.
                if need_rewrite:
                    for key in existing_keys:
                        if agg_oldest_values['prev_oldest_mutable_key'] <= key:
                            if key >= agg_oldest_values['oldest_mutable_key']:
                                break
                            LOG.debug(
                                "Compressing previous split %s (%s) for "
                                "metric %s", key, aggregation.method, metric)
                            # NOTE(jd) Rewrite it entirely for fun (and
                            # later for compression). For that, we just
                            # pass an empty split.
                            keys_and_split_to_store[(key, aggregation)] = (
                                carbonara.AggregatedTimeSerie(aggregation))

            for key, split in ts.split():
                if key >= oldest_key_to_keep:
                    LOG.debug("Storing split %s (%s) for metric %s", key,
                              aggregation.method, metric)
                    keys_and_split_to_store[(key, aggregation)] = split

        return (deleted_keys, keys_and_split_to_store)
    def _get_measures_timeserie(self,
                                metric,
                                aggregation,
                                granularity,
                                from_timestamp=None,
                                to_timestamp=None):

        # Find the number of point
        for d in metric.archive_policy.definition:
            if d.granularity == granularity:
                points = d.points
                break
        else:
            raise storage.GranularityDoesNotExist(metric, granularity)

        all_keys = None
        try:
            all_keys = self._list_split_keys_for_metric(
                metric, aggregation, granularity)
        except storage.MetricDoesNotExist:
            # This can happen if it's an old metric with a TimeSerieArchive
            all_keys = None

        if not all_keys:
            # It does not mean we have no data: it can be an old metric with a
            # TimeSerieArchive.
            try:
                data = self._get_metric_archive(metric, aggregation)
            except (storage.MetricDoesNotExist,
                    storage.AggregationDoesNotExist):
                # It really does not exist
                for d in metric.archive_policy.definition:
                    if d.granularity == granularity:
                        return carbonara.AggregatedTimeSerie(
                            sampling=granularity,
                            aggregation_method=aggregation,
                            max_size=d.points)
                raise storage.GranularityDoesNotExist(metric, granularity)
            else:
                archive = carbonara.TimeSerieArchive.unserialize(data)
                # It's an old metric with an TimeSerieArchive!
                for ts in archive.agg_timeseries:
                    if ts.sampling == granularity:
                        return ts
                raise storage.GranularityDoesNotExist(metric, granularity)

        if from_timestamp:
            from_timestamp = carbonara.AggregatedTimeSerie.get_split_key(
                from_timestamp, granularity)

        if to_timestamp:
            to_timestamp = carbonara.AggregatedTimeSerie.get_split_key(
                to_timestamp, granularity)

        timeseries = filter(
            lambda x: x is not None,
            self._map_in_thread(
                self._get_measures_and_unserialize,
                ((metric, key, aggregation, granularity) for key in all_keys
                 if ((not from_timestamp or key >= from_timestamp) and (
                     not to_timestamp or key <= to_timestamp)))))

        return carbonara.AggregatedTimeSerie.from_timeseries(
            sampling=granularity,
            aggregation_method=aggregation,
            timeseries=timeseries,
            max_size=points)
Пример #30
0
    def _add_measures(self, metric, aggregations, grouped_serie,
                      previous_oldest_mutable_timestamp,
                      oldest_mutable_timestamp):
        # We only need to check for rewrite if driver is not in WRITE_FULL mode
        # and if we already stored splits once
        need_rewrite = (
            not self.WRITE_FULL
            and previous_oldest_mutable_timestamp is not None
        )

        timeseries = {}
        aggregations_needing_list_of_keys = set()

        for aggregation in aggregations:
            ts = carbonara.AggregatedTimeSerie.from_grouped_serie(
                grouped_serie, aggregation)

            # Don't do anything if the timeserie is empty
            if not ts:
                continue
            # Otherwise, store it for the next iteration
            timeseries[aggregation] = ts

            if aggregation.timespan:
                oldest_point_to_keep = ts.truncate(aggregation.timespan)
            else:
                oldest_point_to_keep = None

            if previous_oldest_mutable_timestamp and (aggregation.timespan or
                                                      need_rewrite):
                previous_oldest_mutable_key = ts.get_split_key(
                    previous_oldest_mutable_timestamp)
                oldest_mutable_key = ts.get_split_key(oldest_mutable_timestamp)

                # only cleanup if there is a new object, as there must be a new
                # object for an old object to be cleanup
                if previous_oldest_mutable_key != oldest_mutable_key:
                    aggregations_needing_list_of_keys.add(aggregation)

        all_existing_keys = self._list_split_keys(
            metric, aggregations_needing_list_of_keys)

        # NOTE(jd) This dict uses (key, aggregation) tuples as keys because
        # using just (key) would not carry the aggregation method and therefore
        # would not be unique per aggregation!
        keys_and_split_to_store = {}
        deleted_keys = set()

        for aggregation, ts in six.iteritems(timeseries):
            oldest_key_to_keep = ts.get_split_key(oldest_point_to_keep)

            # If we listed the keys for the aggregation, that's because we need
            # to check for cleanup and/or rewrite
            if aggregation in all_existing_keys:
                # FIXME(jd) This should be sorted by the driver and asserted it
                # is in tests. It's likely backends already sort anyway.
                existing_keys = sorted(all_existing_keys[aggregation])
                # First, check for old splits to delete
                if aggregation.timespan:
                    for key in list(existing_keys):
                        # NOTE(jd) Only delete if the key is strictly
                        # inferior the timestamp; we don't delete any
                        # timeserie split that contains our timestamp, so
                        # we prefer to keep a bit more than deleting too
                        # much
                        if key >= oldest_key_to_keep:
                            break
                        deleted_keys.add((key, aggregation))
                        existing_keys.remove(key)

                # Rewrite all read-only splits just for fun (and
                # compression). This only happens if
                # `previous_oldest_mutable_timestamp' exists, which means
                # we already wrote some splits at some point – so this is
                # not the first time we treat this timeserie.
                if need_rewrite:
                    for key in existing_keys:
                        if previous_oldest_mutable_key <= key:
                            if key >= oldest_mutable_key:
                                break
                            LOG.debug(
                                "Compressing previous split %s (%s) for "
                                "metric %s", key, aggregation.method,
                                metric)
                            # NOTE(jd) Rewrite it entirely for fun (and
                            # later for compression). For that, we just
                            # pass an empty split.
                            keys_and_split_to_store[
                                (key, aggregation)] = (
                                carbonara.AggregatedTimeSerie(
                                    aggregation)
                            )

            for key, split in ts.split():
                if key >= oldest_key_to_keep:
                    LOG.debug(
                        "Storing split %s (%s) for metric %s",
                        key, aggregation.method, metric)
                    keys_and_split_to_store[(key, aggregation)] = split

        self._delete_metric_splits(metric, deleted_keys)
        self._store_timeserie_splits(
            metric, keys_and_split_to_store, oldest_mutable_timestamp)