def test_bad_args(self): """Trigger exceptions and warnings, etc.""" uin = Stream() with warnings.catch_warnings(record=True) as wrn: Pipeline().from_source(uin).window_by('1h', utc=False) self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, PipelineWarning)) # bad arg with self.assertRaises(PipelineException): Pipeline().from_source(dict()) # no source with self.assertRaises(PipelineException): Pipeline().to_keyed_collections() # can't iterate on unbounded source with self.assertRaises(PipelineIOException): list(uin.events()) # bad emit on type with self.assertRaises(PipelineIOException): (Pipeline().from_source( TimeSeries( dict(name='events', events=DEEP_EVENT_LIST))).emit_on('BOGUS').aggregate({ 'max_in': { 'direction.in': Functions.max() } }).to_event_list())
def test_streaming_offset_chain(self): """stream events with an offset pipeline.""" def cback(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS # pylint: disable=global-statement RESULTS = collection source = Stream() ( Pipeline() .from_source(source) .offset_by(3, 'in') .to(CollectionOut, cback) ) source.add_event(EVENTLIST1[0]) source.add_event(EVENTLIST1[1]) # Spurious lint error due to upstream tinkering # with the global variable # pylint: disable=no-member self.assertEqual(RESULTS.size(), 2) self.assertEqual(RESULTS.at(0).get('in'), 4) self.assertEqual(RESULTS.at(1).get('in'), 6)
def test_bad_args(self): """Trigger exceptions and warnings, etc.""" uin = Stream() with warnings.catch_warnings(record=True) as wrn: Pipeline().from_source(uin).window_by('1h', utc=False) self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, PipelineWarning)) # bad arg with self.assertRaises(PipelineException): Pipeline().from_source(dict()) # no source with self.assertRaises(PipelineException): Pipeline().to_keyed_collections() # can't iterate on unbounded source with self.assertRaises(PipelineIOException): list(uin.events()) # bad emit on type with self.assertRaises(PipelineIOException): ( Pipeline() .from_source(TimeSeries(dict(name='events', events=DEEP_EVENT_LIST))) .emit_on('BOGUS') .aggregate( {'max_in': {'direction.in': Functions.max()}} ) .to_event_list() )
def test_streaming_multiple_chains(self): """streaming events with two pipelines.""" def cback(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS # pylint: disable=global-statement RESULTS = collection def cback2(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS2 # pylint: disable=global-statement RESULTS2 = collection source = Stream() pip1 = (Pipeline().from_source(source).offset_by(1, 'in').offset_by( 2, 'in').to(CollectionOut, cback)) pip1.offset_by(3, 'in').to(CollectionOut, cback2) source.add_event(EVENTLIST1[0]) # Spurious lint error due to upstream tinkering # with the global variable # pylint: disable=no-member self.assertEqual(RESULTS.size(), 1) self.assertEqual(RESULTS2.size(), 1) self.assertEqual(RESULTS.at(0).get('in'), 4) self.assertEqual(RESULTS2.at(0).get('in'), 7)
def test_aggregate_and_conversion(self): """Aggregate/average and convert to TimeRangeEvent.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=57)), {'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=58)), {'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=59)), {'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=2, minute=0)), {'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=2, minute=1)), {'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event uin = Stream() ( Pipeline() .from_source(uin) .window_by('1h') .emit_on('eachEvent') .aggregate( { 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()}, } ) .as_time_range_events(dict(alignment='lag')) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6) self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3) self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
def test_windowed_average(self): """aggregate events into by windowed avg.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=57)), {'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=58)), {'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=59)), {'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=0)), {'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=1)), {'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(event.index())] = event uin = Stream() ( Pipeline() .from_source(uin) .window_by('1h') .emit_on('eachEvent') .aggregate( { 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()} } ) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3) self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
def test_event_to_event_noop(self): """Event to Event as a noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._event) (Pipeline().from_source(stream1).as_events().to(EventOut, cback1)) stream1.add_event(self._event)
def test_idxe_to_idxe_noop(self): """IndexedEvent -> IndexedEvent noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._idxe) (Pipeline().from_source(stream1).as_indexed_events().to( EventOut, cback1)) stream1.add_event(self._idxe)
def test_tre_to_tre_noop(self): """TimeRangeEvent -> TimeRangeEvent noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._tre) (Pipeline().from_source(stream1).as_time_range_events().to( EventOut, cback1)) stream1.add_event(self._tre)
def test_idxe_to_tre(self): """IndexedEvent -> TimeRangeEvent conversion.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(ms_from_dt(event.begin()), 1426316400000) self.assertEqual(ms_from_dt(event.end()), 1426320000000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream1).as_time_range_events().to( EventOut, cback1)) stream1.add_event(self._idxe)
def test_event_to_idxe_conversion(self): """Test converting Event object to IndexedEvent.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): self.assertEqual(event.index_as_string(), '1h-396199') self.assertEqual(event.get(), 3) (Pipeline().from_source(stream1).as_indexed_events( dict(duration='1h')).to(EventOut, cback1)) stream1.add_event(self._event)
def test_bad_conversion_error(self): """Test converting a non-Event.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): # pylint: disable=unused-argument pass (Pipeline().from_source(stream1).as_indexed_events( dict(duration='1h')).to(EventOut, cback1)) with self.assertRaises(ProcessorException): stream1.add_event(Pipeline())
def test_tre_to_idxe_error(self): """Test converting TimeRangeEvent object to IndexedEvent error.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): # pylint: disable=unused-argument pass (Pipeline().from_source(stream1).as_indexed_events( dict(duration='1h')).to(EventOut, cback1)) with self.assertRaises(ProcessorException): stream1.add_event(self._tre)
def test_idxe_to_idxe_noop(self): """IndexedEvent -> IndexedEvent noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._idxe) ( Pipeline() .from_source(stream1) .as_indexed_events() .to(EventOut, cback1) ) stream1.add_event(self._idxe)
def test_tre_to_tre_noop(self): """TimeRangeEvent -> TimeRangeEvent noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._tre) ( Pipeline() .from_source(stream1) .as_time_range_events() .to(EventOut, cback1) ) stream1.add_event(self._tre)
def test_event_to_event_noop(self): """Event to Event as a noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._event) ( Pipeline() .from_source(stream1) .as_events() .to(EventOut, cback1) ) stream1.add_event(self._event)
def test_event_to_tre_conversion(self): """test converting Event objects to TimeRangeEvent.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): self.assertEqual(ms_from_dt(event.begin()), 1426316400000) self.assertEqual(ms_from_dt(event.end()), 1426320000000) self.assertEqual(event.get(), 3) ( Pipeline() .from_source(stream1) .as_time_range_events(dict(alignment='front', duration='1h')) .to(EventOut, cback1) ) stream1.add_event(self._event) stream2 = Stream() def cback2(event): self.assertEqual(ms_from_dt(event.begin()), 1426314600000) self.assertEqual(ms_from_dt(event.end()), 1426318200000) self.assertEqual(event.get(), 3) ( Pipeline() .from_source(stream2) .as_time_range_events(dict(alignment='center', duration='1h')) .to(EventOut, cback2) ) stream2.add_event(self._event) stream3 = Stream() def cback3(event): self.assertEqual(ms_from_dt(event.begin()), 1426312800000) self.assertEqual(ms_from_dt(event.end()), 1426316400000) self.assertEqual(event.get(), 3) ( Pipeline() .from_source(stream3) .as_time_range_events(dict(alignment='behind', duration='1h')) .to(EventOut, cback3) ) stream3.add_event(self._event)
def test_idxe_to_tre(self): """IndexedEvent -> TimeRangeEvent conversion.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(ms_from_dt(event.begin()), 1426316400000) self.assertEqual(ms_from_dt(event.end()), 1426320000000) self.assertEqual(event.get(), 3) ( Pipeline() .from_source(stream1) .as_time_range_events() .to(EventOut, cback1) ) stream1.add_event(self._idxe)
def test_idxe_to_event(self): """IndexedEvent -> Event conversion.""" stream1 = Stream() # pylint: disable=missing-docstring def cback1(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426318200000) self.assertEqual(event.get(), 3) ( Pipeline() .from_source(stream1) .as_events(dict(alignment='center')) .to(EventOut, cback1) ) stream1.add_event(self._idxe) stream2 = Stream() def cback2(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426316400000) self.assertEqual(event.get(), 3) ( Pipeline() .from_source(stream2) .as_events(dict(alignment='lag')) .to(EventOut, cback2) ) stream2.add_event(self._idxe) stream3 = Stream() def cback3(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426320000000) self.assertEqual(event.get(), 3) ( Pipeline() .from_source(stream3) .as_events(dict(alignment='lead')) .to(EventOut, cback3) ) stream3.add_event(self._idxe)
def test_event_to_idxe_conversion(self): """Test converting Event object to IndexedEvent.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): self.assertEqual(event.index_as_string(), '1h-396199') self.assertEqual(event.get(), 3) ( Pipeline() .from_source(stream1) .as_indexed_events(dict(duration='1h')) .to(EventOut, cback1) ) stream1.add_event(self._event)
def test_bad_conversion_error(self): """Test converting a non-Event.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): # pylint: disable=unused-argument pass ( Pipeline() .from_source(stream1) .as_indexed_events(dict(duration='1h')) .to(EventOut, cback1) ) with self.assertRaises(ProcessorException): stream1.add_event(Pipeline())
def test_tre_to_idxe_error(self): """Test converting TimeRangeEvent object to IndexedEvent error.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): # pylint: disable=unused-argument pass ( Pipeline() .from_source(stream1) .as_indexed_events(dict(duration='1h')) .to(EventOut, cback1) ) with self.assertRaises(ProcessorException): stream1.add_event(self._tre)
def test_linear_stream(self): """Test streaming on linear fill""" def cback(collection, window_key, group_by): """the callback""" global RESULTS # pylint: disable=global-statement RESULTS = collection events = [ Event(1400425947000, 1), Event(1400425948000, 2), Event(1400425949000, dict(value=None)), Event(1400425950000, dict(value=None)), Event(1400425951000, dict(value=None)), Event(1400425952000, 5), Event(1400425953000, 6), Event(1400425954000, 7), ] stream = Stream() ( Pipeline() .from_source(stream) .fill(method='linear', field_spec='value') .to(CollectionOut, cback) ) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), len(events)) self.assertEqual(RESULTS.at(0).get(), 1) self.assertEqual(RESULTS.at(1).get(), 2) self.assertEqual(RESULTS.at(2).get(), 2.75) # filled self.assertEqual(RESULTS.at(3).get(), 3.5) # filled self.assertEqual(RESULTS.at(4).get(), 4.25) # filled self.assertEqual(RESULTS.at(5).get(), 5) self.assertEqual(RESULTS.at(6).get(), 6) self.assertEqual(RESULTS.at(7).get(), 7)
def test_event_conversion_bad_args(self): """test bad args for Event conversion.""" stream1 = Stream() def cback(event): # pylint: disable=missing-docstring, unused-argument pass # no duration ( Pipeline() .from_source(stream1) .as_time_range_events(dict(alignment='front')) .to(EventOut, cback) ) with self.assertRaises(ProcessorException): stream1.add_event(self._event) stream2 = Stream() # bad alignment ( Pipeline() .from_source(stream2) .as_time_range_events(dict(alignment='bogus', duration='1h')) .to(EventOut, cback) ) with self.assertRaises(ProcessorException): stream2.add_event(self._event)
def test_linear_stream(self): """Test streaming on linear fill""" def cback(collection, window_key, group_by): """the callback""" global RESULTS # pylint: disable=global-statement RESULTS = collection events = [ Event(1400425947000, 1), Event(1400425948000, 2), Event(1400425949000, dict(value=None)), Event(1400425950000, dict(value=None)), Event(1400425951000, dict(value=None)), Event(1400425952000, 5), Event(1400425953000, 6), Event(1400425954000, 7), ] stream = Stream() (Pipeline().from_source(stream).fill(method='linear', field_spec='value').to( CollectionOut, cback)) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), len(events)) self.assertEqual(RESULTS.at(0).get(), 1) self.assertEqual(RESULTS.at(1).get(), 2) self.assertEqual(RESULTS.at(2).get(), 2.75) # filled self.assertEqual(RESULTS.at(3).get(), 3.5) # filled self.assertEqual(RESULTS.at(4).get(), 4.25) # filled self.assertEqual(RESULTS.at(5).get(), 5) self.assertEqual(RESULTS.at(6).get(), 6) self.assertEqual(RESULTS.at(7).get(), 7)
def test_event_conversion_bad_args(self): """test bad args for Event conversion.""" stream1 = Stream() def cback(event): # pylint: disable=missing-docstring, unused-argument pass # no duration (Pipeline().from_source(stream1).as_time_range_events( dict(alignment='front')).to(EventOut, cback)) with self.assertRaises(ProcessorException): stream1.add_event(self._event) stream2 = Stream() # bad alignment (Pipeline().from_source(stream2).as_time_range_events( dict(alignment='bogus', duration='1h')).to(EventOut, cback)) with self.assertRaises(ProcessorException): stream2.add_event(self._event)
def test_idxe_to_event(self): """IndexedEvent -> Event conversion.""" stream1 = Stream() # pylint: disable=missing-docstring def cback1(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426318200000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream1).as_events( dict(alignment='center')).to(EventOut, cback1)) stream1.add_event(self._idxe) stream2 = Stream() def cback2(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426316400000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream2).as_events(dict(alignment='lag')).to( EventOut, cback2)) stream2.add_event(self._idxe) stream3 = Stream() def cback3(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426320000000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream3).as_events(dict(alignment='lead')).to( EventOut, cback3)) stream3.add_event(self._idxe)
def test_linear_stream_limit(self): """Test streaming on linear fill with limiter""" # Sets up a state where we stop seeing a good data # on a linear fill. In this case the Taker is used to # not only limit the number of results, but also to # make sure any cached events get emitted. def cback(collection, window_key, group_by): """the callback""" global RESULTS # pylint: disable=global-statement RESULTS = collection events = [ Event(1400425947000, 1), Event(1400425948000, 2), Event(1400425949000, dict(value=None)), Event(1400425950000, 3), Event(1400425951000, dict(value=None)), Event(1400425952000, dict(value=None)), Event(1400425953000, dict(value=None)), Event(1400425954000, dict(value=None)), ] # error state first - the last 4 events won't be emitted. stream = Stream() ( Pipeline() .from_source(stream) .fill(method='linear', field_spec='value') .to(CollectionOut, cback) ) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), 4) # shut it down and check again. stream.stop() # events "stuck" in the cache have been emitted self.assertEqual(RESULTS.size(), 8) # now use the Taker to make sure any cached events get # emitted as well - setting the fill_limit to 3 here # will make it so on the 7th event (after 3 have been # cached) those will be emitted, and then the 8th event # will be emitted because the state has been reset to # "have not seen a valid value yet" which means that # invalid events will be emitted and not cached. stream = Stream() ( Pipeline() .from_source(stream) .fill(method='linear', fill_limit=3, field_spec='value') .to(CollectionOut, cback) ) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), 8)
def test_streaming_start_stop(self): """turn the stream off and on.""" def cback(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS # pylint: disable=global-statement RESULTS = collection source = Stream() (Pipeline().from_source(source).offset_by(3, 'in').to( CollectionOut, cback)) source.add_event(EVENTLIST1[0]) source.add_event(EVENTLIST1[1]) source.stop() source.add_event(EVENTLIST1[2]) # Spurious lint error due to upstream tinkering # with the global variable # pylint: disable=no-member # source stopped, event shouldn't be added self.assertEqual(RESULTS.size(), 2) source.start() source.add_event(EVENTLIST1[2]) self.assertEqual(RESULTS.size(), 3)
def test_windowed_average(self): """aggregate events into by windowed avg.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=57)), { 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=58)), { 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=59)), { 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=0)), { 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=1)), { 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(event.index())] = event uin = Stream() (Pipeline().from_source(uin).window_by('1h').emit_on( 'eachEvent').aggregate({ 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() } }).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3) self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
def test_collect_and_aggregate(self): """collect events together and aggregate.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=57)), {'type': 'a', 'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=58)), {'type': 'a', 'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=59)), {'type': 'b', 'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=0)), {'type': 'a', 'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=1)), {'type': 'b', 'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event uin = Stream() ( Pipeline() .from_source(uin) .group_by('type') .window_by( Capsule( duration='1h', type='fixed' ) ) .emit_on('eachEvent') .aggregate( { 'type': {'type': Functions.keep()}, 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()} } ) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5) self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6) self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4) self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7) self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5) self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)
def test_streaming_start_stop(self): """turn the stream off and on.""" def cback(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS # pylint: disable=global-statement RESULTS = collection source = Stream() ( Pipeline() .from_source(source) .offset_by(3, 'in') .to(CollectionOut, cback) ) source.add_event(EVENTLIST1[0]) source.add_event(EVENTLIST1[1]) source.stop() source.add_event(EVENTLIST1[2]) # Spurious lint error due to upstream tinkering # with the global variable # pylint: disable=no-member # source stopped, event shouldn't be added self.assertEqual(RESULTS.size(), 2) source.start() source.add_event(EVENTLIST1[2]) self.assertEqual(RESULTS.size(), 3)
def test_collect_and_aggregate(self): """collect events together and aggregate.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=57)), { 'type': 'a', 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=58)), { 'type': 'a', 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=59)), { 'type': 'b', 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=0)), { 'type': 'a', 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=1)), { 'type': 'b', 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event uin = Stream() (Pipeline().from_source(uin).group_by('type').window_by( Capsule(duration='1h', type='fixed')).emit_on('eachEvent').aggregate({ 'type': { 'type': Functions.keep() }, 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() } }).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5) self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6) self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4) self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7) self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5) self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)
def test_aggregate_and_conversion(self): """Aggregate/average and convert to TimeRangeEvent.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=57)), { 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=58)), { 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=59)), { 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=2, minute=0)), { 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=2, minute=1)), { 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event uin = Stream() (Pipeline().from_source(uin).window_by('1h').emit_on( 'eachEvent').aggregate({ 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() }, }).as_time_range_events(dict(alignment='lag')).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6) self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3) self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
def test_event_to_tre_conversion(self): """test converting Event objects to TimeRangeEvent.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): self.assertEqual(ms_from_dt(event.begin()), 1426316400000) self.assertEqual(ms_from_dt(event.end()), 1426320000000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream1).as_time_range_events( dict(alignment='front', duration='1h')).to(EventOut, cback1)) stream1.add_event(self._event) stream2 = Stream() def cback2(event): self.assertEqual(ms_from_dt(event.begin()), 1426314600000) self.assertEqual(ms_from_dt(event.end()), 1426318200000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream2).as_time_range_events( dict(alignment='center', duration='1h')).to(EventOut, cback2)) stream2.add_event(self._event) stream3 = Stream() def cback3(event): self.assertEqual(ms_from_dt(event.begin()), 1426312800000) self.assertEqual(ms_from_dt(event.end()), 1426316400000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream3).as_time_range_events( dict(alignment='behind', duration='1h')).to(EventOut, cback3)) stream3.add_event(self._event)
def test_linear_stream_limit(self): """Test streaming on linear fill with limiter""" # Sets up a state where we stop seeing a good data # on a linear fill. In this case the Taker is used to # not only limit the number of results, but also to # make sure any cached events get emitted. def cback(collection, window_key, group_by): """the callback""" global RESULTS # pylint: disable=global-statement RESULTS = collection events = [ Event(1400425947000, 1), Event(1400425948000, 2), Event(1400425949000, dict(value=None)), Event(1400425950000, 3), Event(1400425951000, dict(value=None)), Event(1400425952000, dict(value=None)), Event(1400425953000, dict(value=None)), Event(1400425954000, dict(value=None)), ] # error state first - the last 4 events won't be emitted. stream = Stream() (Pipeline().from_source(stream).fill(method='linear', field_spec='value').to( CollectionOut, cback)) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), 4) # shut it down and check again. stream.stop() # events "stuck" in the cache have been emitted self.assertEqual(RESULTS.size(), 8) # now use the Taker to make sure any cached events get # emitted as well - setting the fill_limit to 3 here # will make it so on the 7th event (after 3 have been # cached) those will be emitted, and then the 8th event # will be emitted because the state has been reset to # "have not seen a valid value yet" which means that # invalid events will be emitted and not cached. stream = Stream() (Pipeline().from_source(stream).fill(method='linear', fill_limit=3, field_spec='value').to( CollectionOut, cback)) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), 8)