def test_nested_merge(self): """trigger merging nested data.""" # pylint: disable=invalid-name e_ts = aware_utcnow() e1 = Event(e_ts, dict(payload=dict(a=1))) e2 = Event(e_ts, dict(payload=dict(b=2))) emerge = Event.merge([e1, e2]) self.assertEqual(emerge[0].get('payload.a'), 1) self.assertEqual(emerge[0].get('payload.b'), 2)
def test_event_merge(self): """Test Event.merge()/merge_events()""" # same timestamp, different keys # good ones, same ts, different payloads pay1 = dict(foo='bar', baz='quux') ev1 = Event(self.aware_ts, pay1) pay2 = dict(foo2='bar', baz2='quux') ev2 = Event(self.aware_ts, pay2) merged = Event.merge([ev1, ev2]) self.assertEqual(set(thaw(merged[0].data())), set(dict(pay1, **pay2)))
def setUp(self): super(TestConverter, self).setUp() self._event = Event(dt_from_ms(1426316400000), 3) self._tre = TimeRangeEvent(TimeRange([1426316400000, 1426320000000]), 3) self._idxe = IndexedEvent("1h-396199", 3)
def test_is_duplicate(self): """Test Event.is_duplicate()""" # events # pylint: disable=invalid-name e_ts = aware_utcnow() e1 = Event(e_ts, 23) e2 = Event(e_ts, 23) self.assertTrue(Event.is_duplicate(e1, e2)) self.assertTrue(Event.is_duplicate(e1, e2, ignore_values=False)) e3 = Event(e_ts, 25) self.assertTrue(Event.is_duplicate(e1, e3)) self.assertFalse(Event.is_duplicate(e1, e3, ignore_values=False)) # indexed events ie1 = IndexedEvent('1d-12355', {'value': 42}) ie2 = IndexedEvent('1d-12355', {'value': 42}) self.assertTrue(Event.is_duplicate(ie1, ie2)) self.assertTrue(Event.is_duplicate(ie1, ie2, ignore_values=False)) ie3 = IndexedEvent('1d-12355', {'value': 44}) self.assertTrue(Event.is_duplicate(ie1, ie3)) self.assertFalse(Event.is_duplicate(ie1, ie3, ignore_values=False)) # time range events test_end_ts = aware_utcnow() test_begin_ts = test_end_ts - datetime.timedelta(hours=12) test_end_ms = ms_from_dt(test_end_ts) test_begin_ms = ms_from_dt(test_begin_ts) tre1 = TimeRangeEvent((test_begin_ms, test_end_ms), 11) tre2 = TimeRangeEvent((test_begin_ms, test_end_ms), 11) self.assertTrue(Event.is_duplicate(tre1, tre2)) self.assertTrue(Event.is_duplicate(tre1, tre2, ignore_values=False)) tre3 = TimeRangeEvent((test_begin_ms, test_end_ms), 22) self.assertTrue(Event.is_duplicate(tre1, tre3)) self.assertFalse(Event.is_duplicate(tre1, tre3, ignore_values=False))
def test_regular_with_event_copy(self): """create a regular event with copy constructor/existing event.""" data = {'a': 3, 'b': 6} event = self._create_event(self.aware_ts, data) event2 = Event(event) self._base_checks(event2, data, dtime=self.aware_ts)
def test_event_same(self): """test Event.same() static method.""" ev1 = copy.copy(self.canned_event) ev2 = copy.copy(self.canned_event) self.assertTrue(Event.same(ev1, ev2)) # make a new one with same data but new timestamp. ev3 = Event(freeze(dict(time=self.aware_ts, data=ev1.data()))) self.assertFalse(Event.same(ev1, ev3))
def test_aggregation_filtering(self): """test the filtering modifers to the agg functions.""" event_objects = [ Event(1429673400000, { 'in': 1, 'out': 2 }), Event(1429673460000, { 'in': 3, 'out': None }), Event(1429673520000, { 'in': 5, 'out': 6 }), ] series = TimeSeries(dict(name='events', events=event_objects)) self.assertEqual(series.sum('out', Filters.ignore_missing), 8) self.assertEqual(series.avg('out', Filters.ignore_missing), 4) self.assertEqual(series.min('out', Filters.zero_missing), 0) self.assertEqual(series.max('out', Filters.propagate_missing), None) self.assertEqual(series.mean('out', Filters.ignore_missing), 4) self.assertEqual(series.median('out', Filters.zero_missing), 2) self.assertEqual(series.stdev('out', Filters.zero_missing), 2.494438257849294) avg_f = Functions.avg(Filters.none_if_empty) self.assertIsNone(avg_f([])) def bad_filtering_function(): # pylint: disable=missing-docstring pass with self.assertRaises(FilterException): series.sum('out', bad_filtering_function)
def test_aggregation_filtering(self): """Test the new filtering methods for cleaning stuff.""" elist = [ Event(1429673400000, { 'in': 1, 'out': 1 }), Event(1429673460000, { 'in': 2, 'out': 5 }), Event(1429673520000, { 'in': 3, 'out': None }), ] coll = Collection(elist) self.assertEqual(coll.aggregate(Functions.sum(), 'in'), 6) self.assertEqual( coll.aggregate(Functions.sum(Filters.propagate_missing), 'in'), 6) self.assertEqual( coll.aggregate(Functions.sum(Filters.propagate_missing), 'out'), None) self.assertEqual( coll.aggregate(Functions.avg(Filters.ignore_missing), 'in'), 2) self.assertEqual( coll.aggregate(Functions.avg(Filters.ignore_missing), 'out'), 3) self.assertEqual( coll.aggregate(Functions.avg(Filters.zero_missing), 'in'), 2) self.assertEqual( coll.aggregate(Functions.avg(Filters.zero_missing), 'out'), 2)
def test_event_valid(self): """test Event.is_valid_value()""" dct = dict( good='good', also_good=[], none=None, nan=float('NaN'), empty_string='', # presume this is undefined ) event = Event(self.aware_ts, dct) self.assertTrue(Event.is_valid_value(event, 'good')) self.assertTrue(Event.is_valid_value(event, 'also_good')) self.assertFalse(Event.is_valid_value(event, 'none')) self.assertFalse(Event.is_valid_value(event, 'nan')) self.assertFalse(Event.is_valid_value(event, 'empty_string'))
def test_mutators(self): """test collection mutation.""" extra_event = Event(1429673580000, {'in': 7, 'out': 8}) new_coll = self._canned_collection.add_event(extra_event) self.assertEqual(new_coll.size(), 4) # test slice() here since this collection is longer. sliced = new_coll.slice(1, 3) self.assertEqual(sliced.size(), 2) self.assertTrue(Event.same(sliced.at(0), EVENT_LIST[1])) # work stddev as well self.assertEqual(new_coll.stdev('in'), 2.23606797749979) self.assertEqual(new_coll.median('in'), 4)
def test_linear_stream(self): """Test streaming on linear fill""" def cback(collection, window_key, group_by): """the callback""" global RESULTS # pylint: disable=global-statement RESULTS = collection events = [ Event(1400425947000, 1), Event(1400425948000, 2), Event(1400425949000, dict(value=None)), Event(1400425950000, dict(value=None)), Event(1400425951000, dict(value=None)), Event(1400425952000, 5), Event(1400425953000, 6), Event(1400425954000, 7), ] stream = Stream() (Pipeline().from_source(stream).fill(method='linear', field_spec='value').to( CollectionOut, cback)) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), len(events)) self.assertEqual(RESULTS.at(0).get(), 1) self.assertEqual(RESULTS.at(1).get(), 2) self.assertEqual(RESULTS.at(2).get(), 2.75) # filled self.assertEqual(RESULTS.at(3).get(), 3.5) # filled self.assertEqual(RESULTS.at(4).get(), 4.25) # filled self.assertEqual(RESULTS.at(5).get(), 5) self.assertEqual(RESULTS.at(6).get(), 6) self.assertEqual(RESULTS.at(7).get(), 7)
DATA = dict(name="traffic", columns=["time", "value", "status"], points=[[1400425947000, 52, "ok"], [1400425948000, 18, "ok"], [1400425949000, 26, "fail"], [1400425950000, 93, "offline"]]) def _strp(dstr): """decode some existing test ts strings from js tests.""" fmt = '%Y-%m-%dT%H:%M:%SZ' return datetime.datetime.strptime(dstr, fmt).replace(tzinfo=pytz.UTC) EVENTLIST1 = [ Event(_strp("2015-04-22T03:30:00Z"), { 'in': 1, 'out': 2 }), Event(_strp("2015-04-22T03:31:00Z"), { 'in': 3, 'out': 4 }), Event(_strp("2015-04-22T03:32:00Z"), { 'in': 5, 'out': 6 }), ] SEPT_2014_DATA = dict(name="traffic", columns=["time", "value"], points=[[1409529600000, 80], [1409533200000, 88], [1409536800000, 52], [1409540400000, 80],
def in_only(event): """make new events wtin only data in.""" return Event(event.timestamp(), {'in': event.get('in')})
def in_only(event): """make new events wtin only data in - same as .select() basically.""" return Event(event.timestamp(), {'in': event.get('in')})
def test_linear_stream_limit(self): """Test streaming on linear fill with limiter""" # Sets up a state where we stop seeing a good data # on a linear fill. In this case the Taker is used to # not only limit the number of results, but also to # make sure any cached events get emitted. def cback(collection, window_key, group_by): """the callback""" global RESULTS # pylint: disable=global-statement RESULTS = collection events = [ Event(1400425947000, 1), Event(1400425948000, 2), Event(1400425949000, dict(value=None)), Event(1400425950000, 3), Event(1400425951000, dict(value=None)), Event(1400425952000, dict(value=None)), Event(1400425953000, dict(value=None)), Event(1400425954000, dict(value=None)), ] # error state first - the last 4 events won't be emitted. stream = Stream() (Pipeline().from_source(stream).fill(method='linear', field_spec='value').to( CollectionOut, cback)) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), 4) # shut it down and check again. stream.stop() # events "stuck" in the cache have been emitted self.assertEqual(RESULTS.size(), 8) # now use the Taker to make sure any cached events get # emitted as well - setting the fill_limit to 3 here # will make it so on the 7th event (after 3 have been # cached) those will be emitted, and then the 8th event # will be emitted because the state has been reset to # "have not seen a valid value yet" which means that # invalid events will be emitted and not cached. stream = Stream() (Pipeline().from_source(stream).fill(method='linear', fill_limit=3, field_spec='value').to( CollectionOut, cback)) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), 8)
def test_event_series(self): """test a series that contains basic event objects.""" event_series = dict(name="traffic", columns=["time", "value", "status"], points=[[1400425947000, 52, "ok"], [1400425948000, 18, "ok"], [1400425949000, 26, "fail"], [1400425950000, 93, "offline"]]) series = TimeSeries(event_series) wire = self._call_interop_script('event', series.to_string()) new_series = TimeSeries(wire) new_json = new_series.to_json() self._validate_wire_points(event_series, new_json) self.assertTrue(new_json.get('utc')) # try something a bit fancier with different types interface_series = dict( name="star-cr5:to_anl_ip-a_v4", description="star-cr5->anl(as683):100ge:site-ex:show:intercloud", device="star-cr5", id=169, interface="to_anl_ip-a_v4", is_ipv6=False, is_oscars=False, oscars_id=None, resource_uri="", site="anl", site_device="noni", site_interface="et-1/0/0", stats_type="Standard", title=None, columns=["time", "in", "out"], points=[[1400425947000, 52, 34], [1400425948000, 18, 13], [1400425949000, 26, 67], [1400425950000, 93, 91]]) series = TimeSeries(interface_series) wire = self._call_interop_script('event', series.to_string()) new_series = TimeSeries(wire) new_json = new_series.to_json() self._validate_wire_points(interface_series, new_json) # Now with a list of events event_objects = [ Event(1429673400000, { 'in': 1, 'out': 2 }), Event(1429673460000, { 'in': 3, 'out': 4 }), Event(1429673520000, { 'in': 5, 'out': 6 }), ] series = TimeSeries(dict(name='events', events=event_objects)) wire = self._call_interop_script('event', series.to_string()) new_series = TimeSeries(wire) for i in enumerate(event_objects): self.assertTrue(Event.same(i[1], new_series.at(i[0])))
FilterException, PipelineIOException, TimeSeriesException, ) from pypond.functions import Functions, Filters from pypond.index import Index from pypond.indexed_event import IndexedEvent from pypond.range import TimeRange from pypond.series import TimeSeries from pypond.timerange_event import TimeRangeEvent from pypond.util import is_pvector, ms_from_dt, aware_utcnow, dt_from_ms # taken from the pipeline tests EVENT_LIST = [ Event(1429673400000, { 'in': 1, 'out': 2 }), Event(1429673460000, { 'in': 3, 'out': 4 }), Event(1429673520000, { 'in': 5, 'out': 6 }), ] # taken from the series tests DATA = dict(name="traffic", columns=["time", "value", "status"], points=[[1400425947000, 52, "ok"], [1400425948000, 18, "ok"],
def test_aggregate_and_conversion(self): """Aggregate/average and convert to TimeRangeEvent.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=57)), { 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=58)), { 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=59)), { 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=2, minute=0)), { 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=2, minute=1)), { 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event uin = Stream() (Pipeline().from_source(uin).window_by('1h').emit_on( 'eachEvent').aggregate({ 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() }, }).as_time_range_events(dict(alignment='lag')).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6) self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3) self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
def test_collect_and_aggregate(self): """collect events together and aggregate.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=57)), { 'type': 'a', 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=58)), { 'type': 'a', 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=59)), { 'type': 'b', 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=0)), { 'type': 'a', 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=1)), { 'type': 'b', 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event uin = Stream() (Pipeline().from_source(uin).group_by('type').window_by( Capsule(duration='1h', type='fixed')).emit_on('eachEvent').aggregate({ 'type': { 'type': Functions.keep() }, 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() } }).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5) self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6) self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4) self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7) self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5) self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)
def test_windowed_average(self): """aggregate events into by windowed avg.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=57)), { 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=58)), { 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=59)), { 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=0)), { 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=1)), { 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(event.index())] = event uin = Stream() (Pipeline().from_source(uin).window_by('1h').emit_on( 'eachEvent').aggregate({ 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() } }).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3) self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
def test_bad_args(self): """Test with bad timestamp.""" with self.assertRaises(EventException): Event(str(self.msec), self.data)
def _create_event(self, arg1, arg2=None): # pylint: disable=no-self-use return Event(arg1, arg2)