def test_aggregate_deep_path(self): """Make sure that the aggregator will work on a deep path.""" elist = (Pipeline().from_source( TimeSeries( dict(name='events', events=DEEP_EVENT_LIST))).emit_on('flush').aggregate( dict(out_max={'direction.out': Functions.max() })).to_event_list()) self.assertEqual(elist[0].get('out_max'), 4) # Make sure it works with the the non-string version to aggregate # multiple columns elist = (Pipeline().from_source( TimeSeries( dict(name='events', events=DEEP_EVENT_LIST))).emit_on('flush').aggregate({ 'in_max': { 'direction.in': Functions.max() }, 'out_max': { 'direction.out': Functions.max() }, }).to_event_list()) self.assertEqual(elist[0].get('out_max'), 4) self.assertEqual(elist[0].get('in_max'), 8)
def test_sum_and_find_max(self): """sum elements, find max get result out.""" def cback(event): """catch the return""" self.assertEqual(event.get('max_total'), 117) timeseries = TimeSeries(IN_OUT_DATA) ( Pipeline() .from_source(timeseries) .emit_on('flush') .collapse(['in', 'out'], 'total', Functions.sum()) .aggregate(dict(max_total=dict(total=Functions.max()))) .to(EventOut, cback) ) # Same test but as an event list elist = ( Pipeline() .from_source(timeseries) .emit_on('flush') .collapse(['in', 'out'], 'total', Functions.sum()) .aggregate(dict(max_total=dict(total=Functions.max()))) .to_event_list() ) self.assertEqual(len(elist), 1) self.assertEqual(elist[0].get('max_total'), 117)
def test_fixed_window(self): """Test fixed window rollup""" timeseries = TimeSeries(SEPT_2014_DATA) daily_avg = timeseries.fixed_window_rollup( '1d', dict(value=dict(value=Functions.avg()))) self.assertEqual(daily_avg.size(), 5) self.assertEqual(daily_avg.at(0).value(), 46.875) self.assertEqual(daily_avg.at(2).value(), 54.083333333333336) self.assertEqual(daily_avg.at(4).value(), 51.85) # not really a rollup, each data point will create one # aggregation index. timeseries = TimeSeries(SEPT_2014_DATA) hourly_avg = timeseries.hourly_rollup( dict(value=dict(value=Functions.avg()))) self.assertEqual(hourly_avg.size(), len(SEPT_2014_DATA.get('points'))) self.assertEqual(hourly_avg.at(0).value(), 80.0) self.assertEqual(hourly_avg.at(2).value(), 52.0) self.assertEqual(hourly_avg.at(4).value(), 26.0)
def test_aggregate_deep_path(self): """Make sure that the aggregator will work on a deep path.""" elist = ( Pipeline() .from_source(TimeSeries(dict(name='events', events=DEEP_EVENT_LIST))) .emit_on('flush') .aggregate(dict(out_max={'direction.out': Functions.max()})) .to_event_list() ) self.assertEqual(elist[0].get('out_max'), 4) # Make sure it works with the the non-string version to aggregate # multiple columns elist = ( Pipeline() .from_source(TimeSeries(dict(name='events', events=DEEP_EVENT_LIST))) .emit_on('flush') .aggregate( { 'in_max': {'direction.in': Functions.max()}, 'out_max': {'direction.out': Functions.max()}, } ) .to_event_list() ) self.assertEqual(elist[0].get('out_max'), 4) self.assertEqual(elist[0].get('in_max'), 8)
def test_non_fixed_rollups(self): """Work the calendar rollup logic / utc / etc.""" timeseries = TimeSeries(SEPT_2014_DATA) # just silence the warnings, not do anything with them. with warnings.catch_warnings(record=True): daily_avg = timeseries.daily_rollup(dict(value=dict(value=Functions.avg()))) ts_1 = SEPT_2014_DATA.get('points')[0][0] self.assertEqual( Index.get_daily_index_string(dt_from_ms(ts_1), utc=False), daily_avg.at(0).index().to_string() ) monthly_avg = timeseries.monthly_rollup(dict(value=dict(value=Functions.avg()))) self.assertEqual( Index.get_monthly_index_string(dt_from_ms(ts_1), utc=False), monthly_avg.at(0).index().to_string() ) yearly_avg = timeseries.yearly_rollup(dict(value=dict(value=Functions.avg()))) self.assertEqual( Index.get_yearly_index_string(dt_from_ms(ts_1), utc=False), yearly_avg.at(0).index().to_string() )
def test_non_fixed_rollups(self): """Work the calendar rollup logic / utc / etc.""" timeseries = TimeSeries(SEPT_2014_DATA) # just silence the warnings, not do anything with them. with warnings.catch_warnings(record=True): daily_avg = timeseries.daily_rollup( dict(value=dict(value=Functions.avg()))) ts_1 = SEPT_2014_DATA.get('points')[0][0] self.assertEqual( Index.get_daily_index_string(dt_from_ms(ts_1), utc=False), daily_avg.at(0).index().to_string()) monthly_avg = timeseries.monthly_rollup( dict(value=dict(value=Functions.avg()))) self.assertEqual( Index.get_monthly_index_string(dt_from_ms(ts_1), utc=False), monthly_avg.at(0).index().to_string()) yearly_avg = timeseries.yearly_rollup( dict(value=dict(value=Functions.avg()))) self.assertEqual( Index.get_yearly_index_string(dt_from_ms(ts_1), utc=False), yearly_avg.at(0).index().to_string())
def test_fixed_window(self): """Test fixed window rollup""" timeseries = TimeSeries(SEPT_2014_DATA) daily_avg = timeseries.fixed_window_rollup( '1d', dict(value=dict(value=Functions.avg()))) self.assertEqual(daily_avg.size(), 5) self.assertEqual(daily_avg.at(0).value(), 46.875) self.assertEqual(daily_avg.at(2).value(), 54.083333333333336) self.assertEqual(daily_avg.at(4).value(), 51.85) # not really a rollup, each data point will create one # aggregation index. timeseries = TimeSeries(SEPT_2014_DATA) hourly_avg = timeseries.hourly_rollup(dict(value=dict(value=Functions.avg()))) self.assertEqual(hourly_avg.size(), len(SEPT_2014_DATA.get('points'))) self.assertEqual(hourly_avg.at(0).value(), 80.0) self.assertEqual(hourly_avg.at(2).value(), 52.0) self.assertEqual(hourly_avg.at(4).value(), 26.0)
def test_aggregate_and_conversion(self): """Aggregate/average and convert to TimeRangeEvent.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=57)), {'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=58)), {'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=59)), {'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=2, minute=0)), {'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=2, minute=1)), {'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event uin = Stream() ( Pipeline() .from_source(uin) .window_by('1h') .emit_on('eachEvent') .aggregate( { 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()}, } ) .as_time_range_events(dict(alignment='lag')) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6) self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3) self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
def test_windowed_average(self): """aggregate events into by windowed avg.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=57)), {'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=58)), {'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=59)), {'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=0)), {'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=1)), {'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(event.index())] = event uin = Stream() ( Pipeline() .from_source(uin) .window_by('1h') .emit_on('eachEvent') .aggregate( { 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()} } ) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3) self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
def test_event_collapse(self): """test collapse()""" ev1 = self._create_event(self.aware_ts, {'a': 5, 'b': 6, 'c': 7}) ev2 = ev1.collapse(['a', 'c'], 'a_to_c', Functions.sum(), append=True) self.assertEqual(len(list(ev2.data().keys())), 4) self.assertEqual(ev2.get('a_to_c'), 12) ev3 = ev1.collapse(['a', 'c'], 'a_to_c', Functions.sum(), append=False) self.assertEqual(len(list(ev3.data().keys())), 1) self.assertEqual(ev3.get('a_to_c'), 12)
def test_sum_events_with_combine(self): """test summing multiple events together via combine on the back end.""" # combine them all events = [ self._create_event(self.aware_ts, {'a': 5, 'b': 6, 'c': 7}), self._create_event(self.aware_ts, {'a': 2, 'b': 3, 'c': 4}), self._create_event(self.aware_ts, {'a': 1, 'b': 2, 'c': 3}), ] result = Event.sum(events) self.assertEqual(result[0].get('a'), 8) self.assertEqual(result[0].get('b'), 11) self.assertEqual(result[0].get('c'), 14) # combine single field result = Event.sum(events, 'a') self.assertEqual(result[0].get('a'), 8) self.assertIsNone(result[0].get('b')) self.assertIsNone(result[0].get('c')) # grab multiple fields result = Event.sum(events, ['a', 'c']) self.assertEqual(result[0].get('a'), 8) self.assertIsNone(result[0].get('b')) self.assertEqual(result[0].get('c'), 14) # average result = Event.avg( events + [self._create_event(self.aware_ts, {'a': 1, 'b': 1, 'c': 2})], 'c') self.assertEqual(result[0].get('c'), 4) # bad arg self.assertEqual(Event.sum([]), []) self.assertEqual(Event.avg([]), []) # work the extra reducer functions in Functions module result = Event.combine(events, 'c', Functions.max()) self.assertEqual(result[0].get('c'), 7) result = Event.combine(events, 'c', Functions.min()) self.assertEqual(result[0].get('c'), 3) result = Event.combine(events, 'c', Functions.count()) self.assertEqual(result[0].get('c'), 3) result = Event.combine(events, 'c', Functions.first()) self.assertEqual(result[0].get('c'), 7) result = Event.combine(events, 'c', Functions.last()) self.assertEqual(result[0].get('c'), 3) result = Event.combine(events, 'c', Functions.difference()) self.assertEqual(result[0].get('c'), 4) self.assertIsNone(Functions.first()([])) self.assertIsNone(Functions.last()([]))
def test_bad_args(self): """Trigger exceptions and warnings, etc.""" uin = Stream() with warnings.catch_warnings(record=True) as wrn: Pipeline().from_source(uin).window_by('1h', utc=False) self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, PipelineWarning)) # bad arg with self.assertRaises(PipelineException): Pipeline().from_source(dict()) # no source with self.assertRaises(PipelineException): Pipeline().to_keyed_collections() # can't iterate on unbounded source with self.assertRaises(PipelineIOException): list(uin.events()) # bad emit on type with self.assertRaises(PipelineIOException): ( Pipeline() .from_source(TimeSeries(dict(name='events', events=DEEP_EVENT_LIST))) .emit_on('BOGUS') .aggregate( {'max_in': {'direction.in': Functions.max()}} ) .to_event_list() )
def test_multiple_collapse_chains(self): """multiple collapsers.""" timeseries = TimeSeries(IN_OUT_DATA) kcol = (Pipeline().from_source(timeseries).collapse( ['in', 'out'], 'in_out_sum', Functions.sum()).collapse( ['in', 'out'], 'in_out_max', Functions.max()).emit_on('flush').to_keyed_collections()) self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117) self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110) self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108) self.assertEqual(kcol.get('all').at(0).get('in_out_max'), 80) self.assertEqual(kcol.get('all').at(1).get('in_out_max'), 88) self.assertEqual(kcol.get('all').at(2).get('in_out_max'), 56)
def test_bad_args(self): """Trigger exceptions and warnings, etc.""" uin = Stream() with warnings.catch_warnings(record=True) as wrn: Pipeline().from_source(uin).window_by('1h', utc=False) self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, PipelineWarning)) # bad arg with self.assertRaises(PipelineException): Pipeline().from_source(dict()) # no source with self.assertRaises(PipelineException): Pipeline().to_keyed_collections() # can't iterate on unbounded source with self.assertRaises(PipelineIOException): list(uin.events()) # bad emit on type with self.assertRaises(PipelineIOException): (Pipeline().from_source( TimeSeries( dict(name='events', events=DEEP_EVENT_LIST))).emit_on('BOGUS').aggregate({ 'max_in': { 'direction.in': Functions.max() } }).to_event_list())
def test_aggregation_filtering(self): """test the filtering modifers to the agg functions.""" event_objects = [ Event(1429673400000, {'in': 1, 'out': 2}), Event(1429673460000, {'in': 3, 'out': None}), Event(1429673520000, {'in': 5, 'out': 6}), ] series = TimeSeries(dict(name='events', events=event_objects)) self.assertEqual(series.sum('out', Filters.ignore_missing), 8) self.assertEqual(series.avg('out', Filters.ignore_missing), 4) self.assertEqual(series.min('out', Filters.zero_missing), 0) self.assertEqual(series.max('out', Filters.propagate_missing), None) self.assertEqual(series.mean('out', Filters.ignore_missing), 4) self.assertEqual(series.median('out', Filters.zero_missing), 2) self.assertEqual(series.stdev('out', Filters.zero_missing), 2.494438257849294) avg_f = Functions.avg(Filters.none_if_empty) self.assertIsNone(avg_f([])) def bad_filtering_function(): # pylint: disable=missing-docstring pass with self.assertRaises(FilterException): series.sum('out', bad_filtering_function)
def test_various_bad_args(self): """ensure proper exceptions are being raised.""" ser1 = TimeSeries(DATA) with self.assertRaises(CollectionException): ser1.aggregate(dict()) with self.assertRaises(CollectionException): ser1.aggregate(Functions.sum(), dict())
def test_ts_collapse(self): """ Test TimeSeries.collapse() """ ces = self._canned_event_series collapsed_ces = ces.collapse(['in', 'out'], 'in_out_sum', Functions.sum()) for i in collapsed_ces.events(): self.assertEqual(i.get('in') + i.get('out'), i.get('in_out_sum'))
def test_underlying_methods(self): """basically aliases for underlying collection methods.""" self.assertEqual(self._canned_event_series.count(), len(EVENT_LIST)) tser = self._canned_event_series self.assertEqual(tser.sum('in'), 9) self.assertEqual(tser.avg('out'), 4) self.assertEqual(tser.mean('out'), 4) self.assertEqual(tser.min('in'), 1) self.assertEqual(tser.max('in'), 5) self.assertEqual(tser.median('out'), 4) self.assertEqual(tser.stdev('out'), 1.632993161855452) # redundant, but for coverage self.assertEqual(tser.aggregate(Functions.sum(), 'in'), 9) self.assertEqual(tser.aggregate(Functions.sum(), ('in',)), 9) ser1 = TimeSeries(DATA) self.assertEqual(ser1.aggregate(Functions.sum()), 189)
def test_event_map_function_arg_and_reduce(self): # pylint: disable=invalid-name """Test Event.map() with a custom function and Event.reduce()""" def map_sum(event): # pylint: disable=missing-docstring # return 'sum', event.get('in') + event.get('out') return dict(sum=event.get('in') + event.get('out')) result = Event.map(self._get_event_series(), map_sum) self.assertEqual(set(result), set({'sum': [13, 17, 21, 26]})) res = Event.reduce(result, Functions.avg()) self.assertEqual(set(res), set({'sum': 19.25}))
def test_collection_collapse(self): """test Collection.collaps()""" col = self._canned_collection collapsed_col = col.collapse(['in', 'out'], 'in_out_sum', Functions.sum()) self.assertEqual(collapsed_col.size(), 3) for i in collapsed_col.events(): self.assertEqual(len(list(i.data().keys())), 3) self.assertEqual(i.get('in') + i.get('out'), i.get('in_out_sum'))
def test_underlying_methods(self): """basically aliases for underlying collection methods.""" self.assertEqual(self._canned_event_series.count(), len(EVENT_LIST)) tser = self._canned_event_series self.assertEqual(tser.sum('in'), 9) self.assertEqual(tser.avg('out'), 4) self.assertEqual(tser.mean('out'), 4) self.assertEqual(tser.min('in'), 1) self.assertEqual(tser.max('in'), 5) self.assertEqual(tser.median('out'), 4) self.assertEqual(tser.stdev('out'), 1.632993161855452) # redundant, but for coverage self.assertEqual(tser.aggregate(Functions.sum(), 'in'), 9) self.assertEqual(tser.aggregate(Functions.sum(), ('in', )), 9) ser1 = TimeSeries(DATA) self.assertEqual(ser1.aggregate(Functions.sum()), 189)
def test_simple_collapse(self): """collapse a subset of columns.""" timeseries = TimeSeries(IN_OUT_DATA) kcol = (Pipeline().from_source(timeseries).collapse( ['in', 'out'], 'in_out_sum', Functions.sum()).emit_on('flush').to_keyed_collections()) self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117) self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110) self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)
def test_multiple_collapse_chains(self): """multiple collapsers.""" timeseries = TimeSeries(IN_OUT_DATA) kcol = ( Pipeline() .from_source(timeseries) .collapse(['in', 'out'], 'in_out_sum', Functions.sum()) .collapse(['in', 'out'], 'in_out_max', Functions.max()) .emit_on('flush') .to_keyed_collections() ) self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117) self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110) self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108) self.assertEqual(kcol.get('all').at(0).get('in_out_max'), 80) self.assertEqual(kcol.get('all').at(1).get('in_out_max'), 88) self.assertEqual(kcol.get('all').at(2).get('in_out_max'), 56)
def test_sum_and_find_max(self): """sum elements, find max get result out.""" def cback(event): """catch the return""" self.assertEqual(event.get('max_total'), 117) timeseries = TimeSeries(IN_OUT_DATA) (Pipeline().from_source(timeseries).emit_on('flush').collapse( ['in', 'out'], 'total', Functions.sum()).aggregate( dict(max_total=dict(total=Functions.max()))).to( EventOut, cback)) # Same test but as an event list elist = (Pipeline().from_source(timeseries).emit_on('flush').collapse( ['in', 'out'], 'total', Functions.sum()).aggregate( dict(max_total=dict(total=Functions.max()))).to_event_list()) self.assertEqual(len(elist), 1) self.assertEqual(elist[0].get('max_total'), 117)
def test_simple_collapse(self): """collapse a subset of columns.""" timeseries = TimeSeries(IN_OUT_DATA) kcol = ( Pipeline() .from_source(timeseries) .collapse(['in', 'out'], 'in_out_sum', Functions.sum()) .emit_on('flush') .to_keyed_collections() ) self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117) self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110) self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)
def test_aggregation_filtering(self): """Test the new filtering methods for cleaning stuff.""" elist = [ Event(1429673400000, {'in': 1, 'out': 1}), Event(1429673460000, {'in': 2, 'out': 5}), Event(1429673520000, {'in': 3, 'out': None}), ] coll = Collection(elist) self.assertEqual(coll.aggregate(Functions.sum(), 'in'), 6) self.assertEqual(coll.aggregate(Functions.sum(Filters.propagate_missing), 'in'), 6) self.assertEqual(coll.aggregate(Functions.sum(Filters.propagate_missing), 'out'), None) self.assertEqual(coll.aggregate(Functions.avg(Filters.ignore_missing), 'in'), 2) self.assertEqual(coll.aggregate(Functions.avg(Filters.ignore_missing), 'out'), 3) self.assertEqual(coll.aggregate(Functions.avg(Filters.zero_missing), 'in'), 2) self.assertEqual(coll.aggregate(Functions.avg(Filters.zero_missing), 'out'), 2)
def test_aggregation_filtering(self): """test the filtering modifers to the agg functions.""" event_objects = [ Event(1429673400000, { 'in': 1, 'out': 2 }), Event(1429673460000, { 'in': 3, 'out': None }), Event(1429673520000, { 'in': 5, 'out': 6 }), ] series = TimeSeries(dict(name='events', events=event_objects)) self.assertEqual(series.sum('out', Filters.ignore_missing), 8) self.assertEqual(series.avg('out', Filters.ignore_missing), 4) self.assertEqual(series.min('out', Filters.zero_missing), 0) self.assertEqual(series.max('out', Filters.propagate_missing), None) self.assertEqual(series.mean('out', Filters.ignore_missing), 4) self.assertEqual(series.median('out', Filters.zero_missing), 2) self.assertEqual(series.stdev('out', Filters.zero_missing), 2.494438257849294) avg_f = Functions.avg(Filters.none_if_empty) self.assertIsNone(avg_f([])) def bad_filtering_function(): # pylint: disable=missing-docstring pass with self.assertRaises(FilterException): series.sum('out', bad_filtering_function)
def test_aggregation_filtering(self): """Test the new filtering methods for cleaning stuff.""" elist = [ Event(1429673400000, { 'in': 1, 'out': 1 }), Event(1429673460000, { 'in': 2, 'out': 5 }), Event(1429673520000, { 'in': 3, 'out': None }), ] coll = Collection(elist) self.assertEqual(coll.aggregate(Functions.sum(), 'in'), 6) self.assertEqual( coll.aggregate(Functions.sum(Filters.propagate_missing), 'in'), 6) self.assertEqual( coll.aggregate(Functions.sum(Filters.propagate_missing), 'out'), None) self.assertEqual( coll.aggregate(Functions.avg(Filters.ignore_missing), 'in'), 2) self.assertEqual( coll.aggregate(Functions.avg(Filters.ignore_missing), 'out'), 3) self.assertEqual( coll.aggregate(Functions.avg(Filters.zero_missing), 'in'), 2) self.assertEqual( coll.aggregate(Functions.avg(Filters.zero_missing), 'out'), 2)
def test_simple_map_reduce(self): """test simple map/reduce.""" result = Event.map_reduce(self._get_event_series(), ['in', 'out'], Functions.avg()) self.assertEqual(set(result), set({'in': 5.0, 'out': 14.25}))
def test_bad_processor_args(self): """Feed the Processors bad args.""" # neither Pipeline or copy ctor with self.assertRaises(ProcessorException): Aggregator(dict()) with self.assertRaises(ProcessorException): Collapser(dict()) with self.assertRaises(ProcessorException): Converter(dict()) with self.assertRaises(ProcessorException): Filter(dict()) with self.assertRaises(ProcessorException): Mapper(dict()) with self.assertRaises(ProcessorException): Offset(dict()) with self.assertRaises(ProcessorException): Selector(dict()) with self.assertRaises(ProcessorException): Taker(dict()) pip = Pipeline() # not passed a callable function with self.assertRaises(ProcessorException): Filter(pip) # bad agg args # no opts with self.assertRaises(ProcessorException): Aggregator(pip) # wrong opt type with self.assertRaises(ProcessorException): Aggregator( pip, Options( fields=list() ) ) # bad opt keys with self.assertRaises(ProcessorException): Aggregator( pip, Options( fields={1: 'foo'} ) ) # bad opt value with self.assertRaises(ProcessorException): Aggregator( pip, Options( fields={'in': 'foo'} ) ) # stream w/no window strat with self.assertRaises(ProcessorException): pip2 = Pipeline(pip._d.update(dict(mode='stream'))) # pylint: disable=protected-access Aggregator( pip2, Options( fields={'in': Functions.avg()} ) ) # bad Converter args # no type in opts with self.assertRaises(ProcessorException): Converter(pip) # bad opt type with self.assertRaises(ProcessorException): Converter( pip, Options( type=Pipeline ) ) # bad Mapper Args with self.assertRaises(ProcessorException): Mapper(dict()) with self.assertRaises(ProcessorException): Mapper(pip)
def test_sum_events_with_combine(self): """test summing multiple events together via combine on the back end.""" # combine them all events = [ self._create_event(self.aware_ts, { 'a': 5, 'b': 6, 'c': 7 }), self._create_event(self.aware_ts, { 'a': 2, 'b': 3, 'c': 4 }), self._create_event(self.aware_ts, { 'a': 1, 'b': 2, 'c': 3 }), ] result = Event.sum(events) self.assertEqual(result[0].get('a'), 8) self.assertEqual(result[0].get('b'), 11) self.assertEqual(result[0].get('c'), 14) # combine single field result = Event.sum(events, 'a') self.assertEqual(result[0].get('a'), 8) self.assertIsNone(result[0].get('b')) self.assertIsNone(result[0].get('c')) # grab multiple fields result = Event.sum(events, ['a', 'c']) self.assertEqual(result[0].get('a'), 8) self.assertIsNone(result[0].get('b')) self.assertEqual(result[0].get('c'), 14) # average result = Event.avg( events + [self._create_event(self.aware_ts, { 'a': 1, 'b': 1, 'c': 2 })], 'c') self.assertEqual(result[0].get('c'), 4) # bad arg self.assertEqual(Event.sum([]), []) self.assertEqual(Event.avg([]), []) # work the extra reducer functions in Functions module result = Event.combine(events, 'c', Functions.max()) self.assertEqual(result[0].get('c'), 7) result = Event.combine(events, 'c', Functions.min()) self.assertEqual(result[0].get('c'), 3) result = Event.combine(events, 'c', Functions.count()) self.assertEqual(result[0].get('c'), 3) result = Event.combine(events, 'c', Functions.first()) self.assertEqual(result[0].get('c'), 7) result = Event.combine(events, 'c', Functions.last()) self.assertEqual(result[0].get('c'), 3) result = Event.combine(events, 'c', Functions.difference()) self.assertEqual(result[0].get('c'), 4) self.assertIsNone(Functions.first()([])) self.assertIsNone(Functions.last()([]))
def test_bad_processor_args(self): """Feed the Processors bad args.""" # neither Pipeline or copy ctor with self.assertRaises(ProcessorException): Aggregator(dict()) with self.assertRaises(ProcessorException): Collapser(dict()) with self.assertRaises(ProcessorException): Converter(dict()) with self.assertRaises(ProcessorException): Filter(dict()) with self.assertRaises(ProcessorException): Mapper(dict()) with self.assertRaises(ProcessorException): Offset(dict()) with self.assertRaises(ProcessorException): Selector(dict()) with self.assertRaises(ProcessorException): Taker(dict()) pip = Pipeline() # not passed a callable function with self.assertRaises(ProcessorException): Filter(pip) # bad agg args # no opts with self.assertRaises(ProcessorException): Aggregator(pip) # wrong opt type with self.assertRaises(ProcessorException): Aggregator(pip, Options(fields=list())) # bad opt keys with self.assertRaises(ProcessorException): Aggregator(pip, Options(fields={1: 'foo'})) # bad opt value with self.assertRaises(ProcessorException): Aggregator(pip, Options(fields={'in': 'foo'})) # stream w/no window strat with self.assertRaises(ProcessorException): pip2 = Pipeline(pip._d.update(dict(mode='stream'))) # pylint: disable=protected-access Aggregator(pip2, Options(fields={'in': Functions.avg()})) # bad Converter args # no type in opts with self.assertRaises(ProcessorException): Converter(pip) # bad opt type with self.assertRaises(ProcessorException): Converter(pip, Options(type=Pipeline)) # bad Mapper Args with self.assertRaises(ProcessorException): Mapper(dict()) with self.assertRaises(ProcessorException): Mapper(pip)
def test_collect_and_aggregate(self): """collect events together and aggregate.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=57)), {'type': 'a', 'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=58)), {'type': 'a', 'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=59)), {'type': 'b', 'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=0)), {'type': 'a', 'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=1)), {'type': 'b', 'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event uin = Stream() ( Pipeline() .from_source(uin) .group_by('type') .window_by( Capsule( duration='1h', type='fixed' ) ) .emit_on('eachEvent') .aggregate( { 'type': {'type': Functions.keep()}, 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()} } ) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5) self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6) self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4) self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7) self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5) self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)
def test_aggregate_and_conversion(self): """Aggregate/average and convert to TimeRangeEvent.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=57)), { 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=58)), { 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=59)), { 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=2, minute=0)), { 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=2, minute=1)), { 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event uin = Stream() (Pipeline().from_source(uin).window_by('1h').emit_on( 'eachEvent').aggregate({ 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() }, }).as_time_range_events(dict(alignment='lag')).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6) self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3) self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
def test_windowed_average(self): """aggregate events into by windowed avg.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=57)), { 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=58)), { 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=59)), { 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=0)), { 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=1)), { 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(event.index())] = event uin = Stream() (Pipeline().from_source(uin).window_by('1h').emit_on( 'eachEvent').aggregate({ 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() } }).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3) self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
def test_collect_and_aggregate(self): """collect events together and aggregate.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=57)), { 'type': 'a', 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=58)), { 'type': 'a', 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=59)), { 'type': 'b', 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=0)), { 'type': 'a', 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=1)), { 'type': 'b', 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event uin = Stream() (Pipeline().from_source(uin).group_by('type').window_by( Capsule(duration='1h', type='fixed')).emit_on('eachEvent').aggregate({ 'type': { 'type': Functions.keep() }, 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() } }).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5) self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6) self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4) self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7) self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5) self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)