示例#1
0
    def test_aggregate_deep_path(self):
        """Make sure that the aggregator will work on a deep path."""

        elist = (Pipeline().from_source(
            TimeSeries(
                dict(name='events',
                     events=DEEP_EVENT_LIST))).emit_on('flush').aggregate(
                         dict(out_max={'direction.out': Functions.max()
                                       })).to_event_list())

        self.assertEqual(elist[0].get('out_max'), 4)

        # Make sure it works with the the non-string version to aggregate
        # multiple columns

        elist = (Pipeline().from_source(
            TimeSeries(
                dict(name='events',
                     events=DEEP_EVENT_LIST))).emit_on('flush').aggregate({
                         'in_max': {
                             'direction.in': Functions.max()
                         },
                         'out_max': {
                             'direction.out': Functions.max()
                         },
                     }).to_event_list())

        self.assertEqual(elist[0].get('out_max'), 4)
        self.assertEqual(elist[0].get('in_max'), 8)
示例#2
0
    def test_sum_and_find_max(self):
        """sum elements, find max get result out."""

        def cback(event):
            """catch the return"""
            self.assertEqual(event.get('max_total'), 117)

        timeseries = TimeSeries(IN_OUT_DATA)

        (
            Pipeline()
            .from_source(timeseries)
            .emit_on('flush')
            .collapse(['in', 'out'], 'total', Functions.sum())
            .aggregate(dict(max_total=dict(total=Functions.max())))
            .to(EventOut, cback)
        )

        # Same test but as an event list

        elist = (
            Pipeline()
            .from_source(timeseries)
            .emit_on('flush')
            .collapse(['in', 'out'], 'total', Functions.sum())
            .aggregate(dict(max_total=dict(total=Functions.max())))
            .to_event_list()
        )

        self.assertEqual(len(elist), 1)
        self.assertEqual(elist[0].get('max_total'), 117)
示例#3
0
    def test_fixed_window(self):
        """Test fixed window rollup"""

        timeseries = TimeSeries(SEPT_2014_DATA)

        daily_avg = timeseries.fixed_window_rollup(
            '1d', dict(value=dict(value=Functions.avg())))

        self.assertEqual(daily_avg.size(), 5)
        self.assertEqual(daily_avg.at(0).value(), 46.875)
        self.assertEqual(daily_avg.at(2).value(), 54.083333333333336)
        self.assertEqual(daily_avg.at(4).value(), 51.85)

        # not really a rollup, each data point will create one
        # aggregation index.

        timeseries = TimeSeries(SEPT_2014_DATA)

        hourly_avg = timeseries.hourly_rollup(
            dict(value=dict(value=Functions.avg())))

        self.assertEqual(hourly_avg.size(), len(SEPT_2014_DATA.get('points')))
        self.assertEqual(hourly_avg.at(0).value(), 80.0)
        self.assertEqual(hourly_avg.at(2).value(), 52.0)
        self.assertEqual(hourly_avg.at(4).value(), 26.0)
示例#4
0
    def test_aggregate_deep_path(self):
        """Make sure that the aggregator will work on a deep path."""

        elist = (
            Pipeline()
            .from_source(TimeSeries(dict(name='events', events=DEEP_EVENT_LIST)))
            .emit_on('flush')
            .aggregate(dict(out_max={'direction.out': Functions.max()}))
            .to_event_list()
        )

        self.assertEqual(elist[0].get('out_max'), 4)

        # Make sure it works with the the non-string version to aggregate
        # multiple columns

        elist = (
            Pipeline()
            .from_source(TimeSeries(dict(name='events', events=DEEP_EVENT_LIST)))
            .emit_on('flush')
            .aggregate(
                {
                    'in_max': {'direction.in': Functions.max()},
                    'out_max': {'direction.out': Functions.max()},
                }
            )
            .to_event_list()
        )

        self.assertEqual(elist[0].get('out_max'), 4)
        self.assertEqual(elist[0].get('in_max'), 8)
示例#5
0
    def test_non_fixed_rollups(self):
        """Work the calendar rollup logic / utc / etc."""

        timeseries = TimeSeries(SEPT_2014_DATA)

        # just silence the warnings, not do anything with them.
        with warnings.catch_warnings(record=True):

            daily_avg = timeseries.daily_rollup(dict(value=dict(value=Functions.avg())))

            ts_1 = SEPT_2014_DATA.get('points')[0][0]

            self.assertEqual(
                Index.get_daily_index_string(dt_from_ms(ts_1), utc=False),
                daily_avg.at(0).index().to_string()
            )

            monthly_avg = timeseries.monthly_rollup(dict(value=dict(value=Functions.avg())))

            self.assertEqual(
                Index.get_monthly_index_string(dt_from_ms(ts_1), utc=False),
                monthly_avg.at(0).index().to_string()
            )

            yearly_avg = timeseries.yearly_rollup(dict(value=dict(value=Functions.avg())))

            self.assertEqual(
                Index.get_yearly_index_string(dt_from_ms(ts_1), utc=False),
                yearly_avg.at(0).index().to_string()
            )
示例#6
0
    def test_non_fixed_rollups(self):
        """Work the calendar rollup logic / utc / etc."""

        timeseries = TimeSeries(SEPT_2014_DATA)

        # just silence the warnings, not do anything with them.
        with warnings.catch_warnings(record=True):

            daily_avg = timeseries.daily_rollup(
                dict(value=dict(value=Functions.avg())))

            ts_1 = SEPT_2014_DATA.get('points')[0][0]

            self.assertEqual(
                Index.get_daily_index_string(dt_from_ms(ts_1), utc=False),
                daily_avg.at(0).index().to_string())

            monthly_avg = timeseries.monthly_rollup(
                dict(value=dict(value=Functions.avg())))

            self.assertEqual(
                Index.get_monthly_index_string(dt_from_ms(ts_1), utc=False),
                monthly_avg.at(0).index().to_string())

            yearly_avg = timeseries.yearly_rollup(
                dict(value=dict(value=Functions.avg())))

            self.assertEqual(
                Index.get_yearly_index_string(dt_from_ms(ts_1), utc=False),
                yearly_avg.at(0).index().to_string())
示例#7
0
    def test_fixed_window(self):
        """Test fixed window rollup"""

        timeseries = TimeSeries(SEPT_2014_DATA)

        daily_avg = timeseries.fixed_window_rollup(
            '1d',
            dict(value=dict(value=Functions.avg())))

        self.assertEqual(daily_avg.size(), 5)
        self.assertEqual(daily_avg.at(0).value(), 46.875)
        self.assertEqual(daily_avg.at(2).value(), 54.083333333333336)
        self.assertEqual(daily_avg.at(4).value(), 51.85)

        # not really a rollup, each data point will create one
        # aggregation index.

        timeseries = TimeSeries(SEPT_2014_DATA)

        hourly_avg = timeseries.hourly_rollup(dict(value=dict(value=Functions.avg())))

        self.assertEqual(hourly_avg.size(), len(SEPT_2014_DATA.get('points')))
        self.assertEqual(hourly_avg.at(0).value(), 80.0)
        self.assertEqual(hourly_avg.at(2).value(), 52.0)
        self.assertEqual(hourly_avg.at(4).value(), 26.0)
示例#8
0
    def test_aggregate_and_conversion(self):
        """Aggregate/average and convert to TimeRangeEvent."""

        events_in = [
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=57)),
                {'in': 3, 'out': 1}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=58)),
                {'in': 9, 'out': 2}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=59)),
                {'in': 6, 'out': 6}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=2, minute=0)),
                {'in': 4, 'out': 7}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=2, minute=1)),
                {'in': 5, 'out': 9}
            ),
        ]

        def cback(event):
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            if RESULTS is None:
                RESULTS = dict()
            RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event

        uin = Stream()

        (
            Pipeline()
            .from_source(uin)
            .window_by('1h')
            .emit_on('eachEvent')
            .aggregate(
                {
                    'in_avg': {'in': Functions.avg()},
                    'out_avg': {'out': Functions.avg()},
                }
            )
            .as_time_range_events(dict(alignment='lag'))
            .to(EventOut, cback)
        )

        for i in events_in:
            uin.add_event(i)

        self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6)
        self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3)

        self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5)
        self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
示例#9
0
    def test_windowed_average(self):
        """aggregate events into by windowed avg."""
        events_in = [
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=57)),
                {'in': 3, 'out': 1}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=58)),
                {'in': 9, 'out': 2}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=59)),
                {'in': 6, 'out': 6}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=0)),
                {'in': 4, 'out': 7}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=1)),
                {'in': 5, 'out': 9}
            ),
        ]

        def cback(event):
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            if RESULTS is None:
                RESULTS = dict()
            RESULTS['{0}'.format(event.index())] = event

        uin = Stream()

        (
            Pipeline()
            .from_source(uin)
            .window_by('1h')
            .emit_on('eachEvent')
            .aggregate(
                {
                    'in_avg': {'in': Functions.avg()},
                    'out_avg': {'out': Functions.avg()}
                }
            )
            .to(EventOut, cback)
        )

        for i in events_in:
            uin.add_event(i)

        self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6)
        self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3)
        self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5)
        self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
示例#10
0
    def test_event_collapse(self):
        """test collapse()"""

        ev1 = self._create_event(self.aware_ts, {'a': 5, 'b': 6, 'c': 7})

        ev2 = ev1.collapse(['a', 'c'], 'a_to_c', Functions.sum(), append=True)
        self.assertEqual(len(list(ev2.data().keys())), 4)
        self.assertEqual(ev2.get('a_to_c'), 12)

        ev3 = ev1.collapse(['a', 'c'], 'a_to_c', Functions.sum(), append=False)
        self.assertEqual(len(list(ev3.data().keys())), 1)
        self.assertEqual(ev3.get('a_to_c'), 12)
示例#11
0
    def test_event_collapse(self):
        """test collapse()"""

        ev1 = self._create_event(self.aware_ts, {'a': 5, 'b': 6, 'c': 7})

        ev2 = ev1.collapse(['a', 'c'], 'a_to_c', Functions.sum(), append=True)
        self.assertEqual(len(list(ev2.data().keys())), 4)
        self.assertEqual(ev2.get('a_to_c'), 12)

        ev3 = ev1.collapse(['a', 'c'], 'a_to_c', Functions.sum(), append=False)
        self.assertEqual(len(list(ev3.data().keys())), 1)
        self.assertEqual(ev3.get('a_to_c'), 12)
示例#12
0
    def test_sum_events_with_combine(self):
        """test summing multiple events together via combine on the back end."""

        # combine them all
        events = [
            self._create_event(self.aware_ts, {'a': 5, 'b': 6, 'c': 7}),
            self._create_event(self.aware_ts, {'a': 2, 'b': 3, 'c': 4}),
            self._create_event(self.aware_ts, {'a': 1, 'b': 2, 'c': 3}),

        ]

        result = Event.sum(events)
        self.assertEqual(result[0].get('a'), 8)
        self.assertEqual(result[0].get('b'), 11)
        self.assertEqual(result[0].get('c'), 14)

        # combine single field
        result = Event.sum(events, 'a')
        self.assertEqual(result[0].get('a'), 8)
        self.assertIsNone(result[0].get('b'))
        self.assertIsNone(result[0].get('c'))

        # grab multiple fields
        result = Event.sum(events, ['a', 'c'])
        self.assertEqual(result[0].get('a'), 8)
        self.assertIsNone(result[0].get('b'))
        self.assertEqual(result[0].get('c'), 14)

        # average
        result = Event.avg(
            events + [self._create_event(self.aware_ts, {'a': 1, 'b': 1, 'c': 2})],
            'c')
        self.assertEqual(result[0].get('c'), 4)

        # bad arg
        self.assertEqual(Event.sum([]), [])
        self.assertEqual(Event.avg([]), [])

        # work the extra reducer functions in Functions module
        result = Event.combine(events, 'c', Functions.max())
        self.assertEqual(result[0].get('c'), 7)

        result = Event.combine(events, 'c', Functions.min())
        self.assertEqual(result[0].get('c'), 3)

        result = Event.combine(events, 'c', Functions.count())
        self.assertEqual(result[0].get('c'), 3)

        result = Event.combine(events, 'c', Functions.first())
        self.assertEqual(result[0].get('c'), 7)

        result = Event.combine(events, 'c', Functions.last())
        self.assertEqual(result[0].get('c'), 3)

        result = Event.combine(events, 'c', Functions.difference())
        self.assertEqual(result[0].get('c'), 4)

        self.assertIsNone(Functions.first()([]))
        self.assertIsNone(Functions.last()([]))
示例#13
0
    def test_bad_args(self):
        """Trigger exceptions and warnings, etc."""

        uin = Stream()

        with warnings.catch_warnings(record=True) as wrn:
            Pipeline().from_source(uin).window_by('1h', utc=False)
            self.assertEqual(len(wrn), 1)
            self.assertTrue(issubclass(wrn[0].category, PipelineWarning))

        # bad arg
        with self.assertRaises(PipelineException):
            Pipeline().from_source(dict())

        # no source
        with self.assertRaises(PipelineException):
            Pipeline().to_keyed_collections()

        # can't iterate on unbounded source
        with self.assertRaises(PipelineIOException):
            list(uin.events())

        # bad emit on type
        with self.assertRaises(PipelineIOException):
            (
                Pipeline()
                .from_source(TimeSeries(dict(name='events', events=DEEP_EVENT_LIST)))
                .emit_on('BOGUS')
                .aggregate(
                    {'max_in': {'direction.in': Functions.max()}}
                )
                .to_event_list()
            )
示例#14
0
    def test_multiple_collapse_chains(self):
        """multiple collapsers."""
        timeseries = TimeSeries(IN_OUT_DATA)

        kcol = (Pipeline().from_source(timeseries).collapse(
            ['in', 'out'], 'in_out_sum', Functions.sum()).collapse(
                ['in', 'out'], 'in_out_max',
                Functions.max()).emit_on('flush').to_keyed_collections())

        self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117)
        self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110)
        self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)

        self.assertEqual(kcol.get('all').at(0).get('in_out_max'), 80)
        self.assertEqual(kcol.get('all').at(1).get('in_out_max'), 88)
        self.assertEqual(kcol.get('all').at(2).get('in_out_max'), 56)
示例#15
0
    def test_bad_args(self):
        """Trigger exceptions and warnings, etc."""

        uin = Stream()

        with warnings.catch_warnings(record=True) as wrn:
            Pipeline().from_source(uin).window_by('1h', utc=False)
            self.assertEqual(len(wrn), 1)
            self.assertTrue(issubclass(wrn[0].category, PipelineWarning))

        # bad arg
        with self.assertRaises(PipelineException):
            Pipeline().from_source(dict())

        # no source
        with self.assertRaises(PipelineException):
            Pipeline().to_keyed_collections()

        # can't iterate on unbounded source
        with self.assertRaises(PipelineIOException):
            list(uin.events())

        # bad emit on type
        with self.assertRaises(PipelineIOException):
            (Pipeline().from_source(
                TimeSeries(
                    dict(name='events',
                         events=DEEP_EVENT_LIST))).emit_on('BOGUS').aggregate({
                             'max_in': {
                                 'direction.in': Functions.max()
                             }
                         }).to_event_list())
示例#16
0
    def test_aggregation_filtering(self):
        """test the filtering modifers to the agg functions."""

        event_objects = [
            Event(1429673400000, {'in': 1, 'out': 2}),
            Event(1429673460000, {'in': 3, 'out': None}),
            Event(1429673520000, {'in': 5, 'out': 6}),
        ]

        series = TimeSeries(dict(name='events', events=event_objects))

        self.assertEqual(series.sum('out', Filters.ignore_missing), 8)
        self.assertEqual(series.avg('out', Filters.ignore_missing), 4)
        self.assertEqual(series.min('out', Filters.zero_missing), 0)
        self.assertEqual(series.max('out', Filters.propagate_missing), None)
        self.assertEqual(series.mean('out', Filters.ignore_missing), 4)
        self.assertEqual(series.median('out', Filters.zero_missing), 2)
        self.assertEqual(series.stdev('out', Filters.zero_missing), 2.494438257849294)

        avg_f = Functions.avg(Filters.none_if_empty)
        self.assertIsNone(avg_f([]))

        def bad_filtering_function():  # pylint: disable=missing-docstring
            pass

        with self.assertRaises(FilterException):
            series.sum('out', bad_filtering_function)
示例#17
0
    def test_various_bad_args(self):
        """ensure proper exceptions are being raised."""

        ser1 = TimeSeries(DATA)

        with self.assertRaises(CollectionException):
            ser1.aggregate(dict())

        with self.assertRaises(CollectionException):
            ser1.aggregate(Functions.sum(), dict())
示例#18
0
    def test_ts_collapse(self):
        """
        Test TimeSeries.collapse()
        """
        ces = self._canned_event_series

        collapsed_ces = ces.collapse(['in', 'out'], 'in_out_sum', Functions.sum())

        for i in collapsed_ces.events():
            self.assertEqual(i.get('in') + i.get('out'), i.get('in_out_sum'))
示例#19
0
    def test_underlying_methods(self):
        """basically aliases for underlying collection methods."""

        self.assertEqual(self._canned_event_series.count(), len(EVENT_LIST))

        tser = self._canned_event_series
        self.assertEqual(tser.sum('in'), 9)
        self.assertEqual(tser.avg('out'), 4)
        self.assertEqual(tser.mean('out'), 4)
        self.assertEqual(tser.min('in'), 1)
        self.assertEqual(tser.max('in'), 5)
        self.assertEqual(tser.median('out'), 4)
        self.assertEqual(tser.stdev('out'), 1.632993161855452)
        # redundant, but for coverage
        self.assertEqual(tser.aggregate(Functions.sum(), 'in'), 9)
        self.assertEqual(tser.aggregate(Functions.sum(), ('in',)), 9)

        ser1 = TimeSeries(DATA)
        self.assertEqual(ser1.aggregate(Functions.sum()), 189)
示例#20
0
    def test_various_bad_args(self):
        """ensure proper exceptions are being raised."""

        ser1 = TimeSeries(DATA)

        with self.assertRaises(CollectionException):
            ser1.aggregate(dict())

        with self.assertRaises(CollectionException):
            ser1.aggregate(Functions.sum(), dict())
示例#21
0
    def test_event_map_function_arg_and_reduce(self):  # pylint: disable=invalid-name
        """Test Event.map() with a custom function and Event.reduce()"""
        def map_sum(event):  # pylint: disable=missing-docstring
            # return 'sum', event.get('in') + event.get('out')
            return dict(sum=event.get('in') + event.get('out'))
        result = Event.map(self._get_event_series(), map_sum)
        self.assertEqual(set(result), set({'sum': [13, 17, 21, 26]}))

        res = Event.reduce(result, Functions.avg())
        self.assertEqual(set(res), set({'sum': 19.25}))
示例#22
0
    def test_collection_collapse(self):
        """test Collection.collaps()"""
        col = self._canned_collection

        collapsed_col = col.collapse(['in', 'out'], 'in_out_sum', Functions.sum())
        self.assertEqual(collapsed_col.size(), 3)

        for i in collapsed_col.events():
            self.assertEqual(len(list(i.data().keys())), 3)
            self.assertEqual(i.get('in') + i.get('out'), i.get('in_out_sum'))
示例#23
0
    def test_underlying_methods(self):
        """basically aliases for underlying collection methods."""

        self.assertEqual(self._canned_event_series.count(), len(EVENT_LIST))

        tser = self._canned_event_series
        self.assertEqual(tser.sum('in'), 9)
        self.assertEqual(tser.avg('out'), 4)
        self.assertEqual(tser.mean('out'), 4)
        self.assertEqual(tser.min('in'), 1)
        self.assertEqual(tser.max('in'), 5)
        self.assertEqual(tser.median('out'), 4)
        self.assertEqual(tser.stdev('out'), 1.632993161855452)
        # redundant, but for coverage
        self.assertEqual(tser.aggregate(Functions.sum(), 'in'), 9)
        self.assertEqual(tser.aggregate(Functions.sum(), ('in', )), 9)

        ser1 = TimeSeries(DATA)
        self.assertEqual(ser1.aggregate(Functions.sum()), 189)
示例#24
0
    def test_simple_collapse(self):
        """collapse a subset of columns."""
        timeseries = TimeSeries(IN_OUT_DATA)

        kcol = (Pipeline().from_source(timeseries).collapse(
            ['in', 'out'], 'in_out_sum',
            Functions.sum()).emit_on('flush').to_keyed_collections())

        self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117)
        self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110)
        self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)
示例#25
0
    def test_event_map_function_arg_and_reduce(self):  # pylint: disable=invalid-name
        """Test Event.map() with a custom function and Event.reduce()"""
        def map_sum(event):  # pylint: disable=missing-docstring
            # return 'sum', event.get('in') + event.get('out')
            return dict(sum=event.get('in') + event.get('out'))

        result = Event.map(self._get_event_series(), map_sum)
        self.assertEqual(set(result), set({'sum': [13, 17, 21, 26]}))

        res = Event.reduce(result, Functions.avg())
        self.assertEqual(set(res), set({'sum': 19.25}))
示例#26
0
    def test_ts_collapse(self):
        """
        Test TimeSeries.collapse()
        """
        ces = self._canned_event_series

        collapsed_ces = ces.collapse(['in', 'out'], 'in_out_sum',
                                     Functions.sum())

        for i in collapsed_ces.events():
            self.assertEqual(i.get('in') + i.get('out'), i.get('in_out_sum'))
示例#27
0
    def test_collection_collapse(self):
        """test Collection.collaps()"""
        col = self._canned_collection

        collapsed_col = col.collapse(['in', 'out'], 'in_out_sum',
                                     Functions.sum())
        self.assertEqual(collapsed_col.size(), 3)

        for i in collapsed_col.events():
            self.assertEqual(len(list(i.data().keys())), 3)
            self.assertEqual(i.get('in') + i.get('out'), i.get('in_out_sum'))
示例#28
0
    def test_multiple_collapse_chains(self):
        """multiple collapsers."""
        timeseries = TimeSeries(IN_OUT_DATA)

        kcol = (
            Pipeline()
            .from_source(timeseries)
            .collapse(['in', 'out'], 'in_out_sum', Functions.sum())
            .collapse(['in', 'out'], 'in_out_max', Functions.max())
            .emit_on('flush')
            .to_keyed_collections()
        )

        self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117)
        self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110)
        self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)

        self.assertEqual(kcol.get('all').at(0).get('in_out_max'), 80)
        self.assertEqual(kcol.get('all').at(1).get('in_out_max'), 88)
        self.assertEqual(kcol.get('all').at(2).get('in_out_max'), 56)
示例#29
0
    def test_sum_and_find_max(self):
        """sum elements, find max get result out."""
        def cback(event):
            """catch the return"""
            self.assertEqual(event.get('max_total'), 117)

        timeseries = TimeSeries(IN_OUT_DATA)

        (Pipeline().from_source(timeseries).emit_on('flush').collapse(
            ['in', 'out'], 'total', Functions.sum()).aggregate(
                dict(max_total=dict(total=Functions.max()))).to(
                    EventOut, cback))

        # Same test but as an event list

        elist = (Pipeline().from_source(timeseries).emit_on('flush').collapse(
            ['in', 'out'], 'total', Functions.sum()).aggregate(
                dict(max_total=dict(total=Functions.max()))).to_event_list())

        self.assertEqual(len(elist), 1)
        self.assertEqual(elist[0].get('max_total'), 117)
示例#30
0
    def test_simple_collapse(self):
        """collapse a subset of columns."""
        timeseries = TimeSeries(IN_OUT_DATA)

        kcol = (
            Pipeline()
            .from_source(timeseries)
            .collapse(['in', 'out'], 'in_out_sum', Functions.sum())
            .emit_on('flush')
            .to_keyed_collections()
        )

        self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117)
        self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110)
        self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)
示例#31
0
    def test_aggregation_filtering(self):
        """Test the new filtering methods for cleaning stuff."""

        elist = [
            Event(1429673400000, {'in': 1, 'out': 1}),
            Event(1429673460000, {'in': 2, 'out': 5}),
            Event(1429673520000, {'in': 3, 'out': None}),
        ]

        coll = Collection(elist)

        self.assertEqual(coll.aggregate(Functions.sum(), 'in'), 6)

        self.assertEqual(coll.aggregate(Functions.sum(Filters.propagate_missing), 'in'), 6)
        self.assertEqual(coll.aggregate(Functions.sum(Filters.propagate_missing), 'out'), None)

        self.assertEqual(coll.aggregate(Functions.avg(Filters.ignore_missing), 'in'), 2)
        self.assertEqual(coll.aggregate(Functions.avg(Filters.ignore_missing), 'out'), 3)

        self.assertEqual(coll.aggregate(Functions.avg(Filters.zero_missing), 'in'), 2)
        self.assertEqual(coll.aggregate(Functions.avg(Filters.zero_missing), 'out'), 2)
示例#32
0
    def test_aggregation_filtering(self):
        """test the filtering modifers to the agg functions."""

        event_objects = [
            Event(1429673400000, {
                'in': 1,
                'out': 2
            }),
            Event(1429673460000, {
                'in': 3,
                'out': None
            }),
            Event(1429673520000, {
                'in': 5,
                'out': 6
            }),
        ]

        series = TimeSeries(dict(name='events', events=event_objects))

        self.assertEqual(series.sum('out', Filters.ignore_missing), 8)
        self.assertEqual(series.avg('out', Filters.ignore_missing), 4)
        self.assertEqual(series.min('out', Filters.zero_missing), 0)
        self.assertEqual(series.max('out', Filters.propagate_missing), None)
        self.assertEqual(series.mean('out', Filters.ignore_missing), 4)
        self.assertEqual(series.median('out', Filters.zero_missing), 2)
        self.assertEqual(series.stdev('out', Filters.zero_missing),
                         2.494438257849294)

        avg_f = Functions.avg(Filters.none_if_empty)
        self.assertIsNone(avg_f([]))

        def bad_filtering_function():  # pylint: disable=missing-docstring
            pass

        with self.assertRaises(FilterException):
            series.sum('out', bad_filtering_function)
示例#33
0
    def test_aggregation_filtering(self):
        """Test the new filtering methods for cleaning stuff."""

        elist = [
            Event(1429673400000, {
                'in': 1,
                'out': 1
            }),
            Event(1429673460000, {
                'in': 2,
                'out': 5
            }),
            Event(1429673520000, {
                'in': 3,
                'out': None
            }),
        ]

        coll = Collection(elist)

        self.assertEqual(coll.aggregate(Functions.sum(), 'in'), 6)

        self.assertEqual(
            coll.aggregate(Functions.sum(Filters.propagate_missing), 'in'), 6)
        self.assertEqual(
            coll.aggregate(Functions.sum(Filters.propagate_missing), 'out'),
            None)

        self.assertEqual(
            coll.aggregate(Functions.avg(Filters.ignore_missing), 'in'), 2)
        self.assertEqual(
            coll.aggregate(Functions.avg(Filters.ignore_missing), 'out'), 3)

        self.assertEqual(
            coll.aggregate(Functions.avg(Filters.zero_missing), 'in'), 2)
        self.assertEqual(
            coll.aggregate(Functions.avg(Filters.zero_missing), 'out'), 2)
示例#34
0
 def test_simple_map_reduce(self):
     """test simple map/reduce."""
     result = Event.map_reduce(self._get_event_series(), ['in', 'out'],
                               Functions.avg())
     self.assertEqual(set(result), set({'in': 5.0, 'out': 14.25}))
示例#35
0
    def test_bad_processor_args(self):
        """Feed the Processors bad args."""

        # neither Pipeline or copy ctor
        with self.assertRaises(ProcessorException):
            Aggregator(dict())
        with self.assertRaises(ProcessorException):
            Collapser(dict())
        with self.assertRaises(ProcessorException):
            Converter(dict())
        with self.assertRaises(ProcessorException):
            Filter(dict())
        with self.assertRaises(ProcessorException):
            Mapper(dict())
        with self.assertRaises(ProcessorException):
            Offset(dict())
        with self.assertRaises(ProcessorException):
            Selector(dict())
        with self.assertRaises(ProcessorException):
            Taker(dict())

        pip = Pipeline()

        # not passed a callable function
        with self.assertRaises(ProcessorException):
            Filter(pip)

        # bad agg args
        # no opts
        with self.assertRaises(ProcessorException):
            Aggregator(pip)

        # wrong opt type
        with self.assertRaises(ProcessorException):
            Aggregator(
                pip,
                Options(
                    fields=list()
                )
            )

        # bad opt keys
        with self.assertRaises(ProcessorException):
            Aggregator(
                pip,
                Options(
                    fields={1: 'foo'}
                )
            )

        # bad opt value
        with self.assertRaises(ProcessorException):
            Aggregator(
                pip,
                Options(
                    fields={'in': 'foo'}
                )
            )

        # stream w/no window strat
        with self.assertRaises(ProcessorException):
            pip2 = Pipeline(pip._d.update(dict(mode='stream')))  # pylint: disable=protected-access

            Aggregator(
                pip2,
                Options(
                    fields={'in': Functions.avg()}
                )
            )

        # bad Converter args
        # no type in opts
        with self.assertRaises(ProcessorException):
            Converter(pip)

        # bad opt type
        with self.assertRaises(ProcessorException):
            Converter(
                pip,
                Options(
                    type=Pipeline
                )
            )

        # bad Mapper Args
        with self.assertRaises(ProcessorException):
            Mapper(dict())
        with self.assertRaises(ProcessorException):
            Mapper(pip)
示例#36
0
 def test_simple_map_reduce(self):
     """test simple map/reduce."""
     result = Event.map_reduce(self._get_event_series(), ['in', 'out'], Functions.avg())
     self.assertEqual(set(result), set({'in': 5.0, 'out': 14.25}))
示例#37
0
    def test_sum_events_with_combine(self):
        """test summing multiple events together via combine on the back end."""

        # combine them all
        events = [
            self._create_event(self.aware_ts, {
                'a': 5,
                'b': 6,
                'c': 7
            }),
            self._create_event(self.aware_ts, {
                'a': 2,
                'b': 3,
                'c': 4
            }),
            self._create_event(self.aware_ts, {
                'a': 1,
                'b': 2,
                'c': 3
            }),
        ]

        result = Event.sum(events)
        self.assertEqual(result[0].get('a'), 8)
        self.assertEqual(result[0].get('b'), 11)
        self.assertEqual(result[0].get('c'), 14)

        # combine single field
        result = Event.sum(events, 'a')
        self.assertEqual(result[0].get('a'), 8)
        self.assertIsNone(result[0].get('b'))
        self.assertIsNone(result[0].get('c'))

        # grab multiple fields
        result = Event.sum(events, ['a', 'c'])
        self.assertEqual(result[0].get('a'), 8)
        self.assertIsNone(result[0].get('b'))
        self.assertEqual(result[0].get('c'), 14)

        # average
        result = Event.avg(
            events +
            [self._create_event(self.aware_ts, {
                'a': 1,
                'b': 1,
                'c': 2
            })], 'c')
        self.assertEqual(result[0].get('c'), 4)

        # bad arg
        self.assertEqual(Event.sum([]), [])
        self.assertEqual(Event.avg([]), [])

        # work the extra reducer functions in Functions module
        result = Event.combine(events, 'c', Functions.max())
        self.assertEqual(result[0].get('c'), 7)

        result = Event.combine(events, 'c', Functions.min())
        self.assertEqual(result[0].get('c'), 3)

        result = Event.combine(events, 'c', Functions.count())
        self.assertEqual(result[0].get('c'), 3)

        result = Event.combine(events, 'c', Functions.first())
        self.assertEqual(result[0].get('c'), 7)

        result = Event.combine(events, 'c', Functions.last())
        self.assertEqual(result[0].get('c'), 3)

        result = Event.combine(events, 'c', Functions.difference())
        self.assertEqual(result[0].get('c'), 4)

        self.assertIsNone(Functions.first()([]))
        self.assertIsNone(Functions.last()([]))
示例#38
0
    def test_bad_processor_args(self):
        """Feed the Processors bad args."""

        # neither Pipeline or copy ctor
        with self.assertRaises(ProcessorException):
            Aggregator(dict())
        with self.assertRaises(ProcessorException):
            Collapser(dict())
        with self.assertRaises(ProcessorException):
            Converter(dict())
        with self.assertRaises(ProcessorException):
            Filter(dict())
        with self.assertRaises(ProcessorException):
            Mapper(dict())
        with self.assertRaises(ProcessorException):
            Offset(dict())
        with self.assertRaises(ProcessorException):
            Selector(dict())
        with self.assertRaises(ProcessorException):
            Taker(dict())

        pip = Pipeline()

        # not passed a callable function
        with self.assertRaises(ProcessorException):
            Filter(pip)

        # bad agg args
        # no opts
        with self.assertRaises(ProcessorException):
            Aggregator(pip)

        # wrong opt type
        with self.assertRaises(ProcessorException):
            Aggregator(pip, Options(fields=list()))

        # bad opt keys
        with self.assertRaises(ProcessorException):
            Aggregator(pip, Options(fields={1: 'foo'}))

        # bad opt value
        with self.assertRaises(ProcessorException):
            Aggregator(pip, Options(fields={'in': 'foo'}))

        # stream w/no window strat
        with self.assertRaises(ProcessorException):
            pip2 = Pipeline(pip._d.update(dict(mode='stream')))  # pylint: disable=protected-access

            Aggregator(pip2, Options(fields={'in': Functions.avg()}))

        # bad Converter args
        # no type in opts
        with self.assertRaises(ProcessorException):
            Converter(pip)

        # bad opt type
        with self.assertRaises(ProcessorException):
            Converter(pip, Options(type=Pipeline))

        # bad Mapper Args
        with self.assertRaises(ProcessorException):
            Mapper(dict())
        with self.assertRaises(ProcessorException):
            Mapper(pip)
示例#39
0
    def test_collect_and_aggregate(self):
        """collect events together and aggregate."""
        events_in = [
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=57)),
                {'type': 'a', 'in': 3, 'out': 1}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=58)),
                {'type': 'a', 'in': 9, 'out': 2}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=59)),
                {'type': 'b', 'in': 6, 'out': 6}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=0)),
                {'type': 'a', 'in': 4, 'out': 7}
            ),
            Event(
                aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=1)),
                {'type': 'b', 'in': 5, 'out': 9}
            ),
        ]

        def cback(event):
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            if RESULTS is None:
                RESULTS = dict()
            RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event

        uin = Stream()

        (
            Pipeline()
            .from_source(uin)
            .group_by('type')
            .window_by(
                Capsule(
                    duration='1h',
                    type='fixed'
                )
            )
            .emit_on('eachEvent')
            .aggregate(
                {
                    'type': {'type': Functions.keep()},
                    'in_avg': {'in': Functions.avg()},
                    'out_avg': {'out': Functions.avg()}
                }
            )
            .to(EventOut, cback)
        )

        for i in events_in:
            uin.add_event(i)

        self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6)
        self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5)
        self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6)
        self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6)
        self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4)
        self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7)
        self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5)
        self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)
示例#40
0
    def test_aggregate_and_conversion(self):
        """Aggregate/average and convert to TimeRangeEvent."""

        events_in = [
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=1, minute=57)), {
                        'in': 3,
                        'out': 1
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=1, minute=58)), {
                        'in': 9,
                        'out': 2
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=1, minute=59)), {
                        'in': 6,
                        'out': 6
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=2, minute=0)), {
                        'in': 4,
                        'out': 7
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=2, minute=1)), {
                        'in': 5,
                        'out': 9
                    }),
        ]

        def cback(event):
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            if RESULTS is None:
                RESULTS = dict()
            RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event

        uin = Stream()

        (Pipeline().from_source(uin).window_by('1h').emit_on(
            'eachEvent').aggregate({
                'in_avg': {
                    'in': Functions.avg()
                },
                'out_avg': {
                    'out': Functions.avg()
                },
            }).as_time_range_events(dict(alignment='lag')).to(EventOut, cback))

        for i in events_in:
            uin.add_event(i)

        self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6)
        self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3)

        self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5)
        self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
示例#41
0
    def test_windowed_average(self):
        """aggregate events into by windowed avg."""
        events_in = [
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=7, minute=57)), {
                        'in': 3,
                        'out': 1
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=7, minute=58)), {
                        'in': 9,
                        'out': 2
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=7, minute=59)), {
                        'in': 6,
                        'out': 6
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=8, minute=0)), {
                        'in': 4,
                        'out': 7
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=8, minute=1)), {
                        'in': 5,
                        'out': 9
                    }),
        ]

        def cback(event):
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            if RESULTS is None:
                RESULTS = dict()
            RESULTS['{0}'.format(event.index())] = event

        uin = Stream()

        (Pipeline().from_source(uin).window_by('1h').emit_on(
            'eachEvent').aggregate({
                'in_avg': {
                    'in': Functions.avg()
                },
                'out_avg': {
                    'out': Functions.avg()
                }
            }).to(EventOut, cback))

        for i in events_in:
            uin.add_event(i)

        self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6)
        self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3)
        self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5)
        self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
示例#42
0
    def test_collect_and_aggregate(self):
        """collect events together and aggregate."""
        events_in = [
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=7, minute=57)), {
                        'type': 'a',
                        'in': 3,
                        'out': 1
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=7, minute=58)), {
                        'type': 'a',
                        'in': 9,
                        'out': 2
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=7, minute=59)), {
                        'type': 'b',
                        'in': 6,
                        'out': 6
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=8, minute=0)), {
                        'type': 'a',
                        'in': 4,
                        'out': 7
                    }),
            Event(
                aware_dt_from_args(
                    dict(year=2015, month=3, day=14, hour=8, minute=1)), {
                        'type': 'b',
                        'in': 5,
                        'out': 9
                    }),
        ]

        def cback(event):
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            if RESULTS is None:
                RESULTS = dict()
            RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event

        uin = Stream()

        (Pipeline().from_source(uin).group_by('type').window_by(
            Capsule(duration='1h',
                    type='fixed')).emit_on('eachEvent').aggregate({
                        'type': {
                            'type': Functions.keep()
                        },
                        'in_avg': {
                            'in': Functions.avg()
                        },
                        'out_avg': {
                            'out': Functions.avg()
                        }
                    }).to(EventOut, cback))

        for i in events_in:
            uin.add_event(i)

        self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6)
        self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5)
        self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6)
        self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6)
        self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4)
        self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7)
        self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5)
        self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)