def test_timed_window_relaxed(self): """ Test batch windows of fixed sizes work ok """ from minibatch import streaming def consumer(): # note the stream decorator blocks the consumer and runs the decorated # function asynchronously upon the window criteria is satisfied @streaming('test', interval=1, relaxed=True, keep=True) def myprocess(window): try: db = setup(self.url) db.processed.insert_one({'data': window.data or {}}) except Exception as e: print(e) return window # start stream consumer proc = Process(target=consumer) proc.start() # fill stream stream = Stream.get_or_create('test') for i in range(10): stream.append({'index': i}) sleep(.5) # give it some time to process sleep(5) proc.terminate() # expect at least 5 entries (10 x .5 = 5 seconds), each of length 1-2 data = list(doc for doc in self.db.processed.find()) count = len(data) self.assertGreater(count, 5) self.assertTrue(all(len(w) >= 2 for w in data))
def _do_test_slow_emitfn(self, workers=None, expect_fail=None, timeout=None): """ Test slow batch windows work properly using {workers} workers """ from minibatch import streaming MiniBatchTests._do_test_slow_emitfn.__doc__ = MiniBatchTests._do_test_slow_emitfn.__doc__.format( workers=workers) def consumer(workers): logger.debug("starting consumer on={self.url} workers={workers}".format(**locals())) url = str(self.url) # note the stream decorator blocks the consumer and runs the decorated # function asynchronously upon the window criteria is satisfied @streaming('test', size=2, keep=True, url=self.url, max_workers=workers) def myprocess(window): logger.debug("*** processing {}".format(window.data)) from minibatch import connectdb try: sleepdot(5) db = connectdb(url=url) db.processed.insert_one({'data': window.data or {}}) except Exception as e: logger.error(e) raise return window def check(): # expect 5 entries, each of length 2 data = list(doc for doc in self.db.processed.find()) count = len(data) logger.debug("data={}".format(data)) self.assertEqual(count, 5) self.assertTrue(all(len(w) == 2 for w in data)) # start stream consumer # -- use just one worker, we expect to fail proc = Process(target=consumer, args=(workers,)) proc.start() # fill stream stream = Stream.get_or_create('test', url=self.url) for i in range(10): stream.append({'index': i}) # give it some time to process logger.debug("waiting") # note it takes at least 25 seconds using 1 worker (5 windows, 5 seconds) # so we expect to fail self.sleep(12) proc.terminate() if expect_fail: with self.assertRaises(AssertionError): check() else: check() # wait for everything to terminate, avoid stream corruption in next test self.sleep(timeout)
def test_stream(self): """ Test a stream writes to a buffer """ stream = Stream.get_or_create('test') stream.append({'foo': 'bar1'}) stream.append({'foo': 'bar2'}) count = len(list(doc for doc in Buffer.objects.all())) self.assertEqual(count, 2)
def test_buffer_cleaned(self): stream = Stream.get_or_create('test', url=self.url) stream.append({'foo': 'bar1'}) stream.append({'foo': 'bar2'}) em = CountWindow('test') em._run_once() em._run_once() docs = list(Buffer.objects.filter()) self.assertEqual(len(docs), 0)
def producer(data): import os import time import random # sleep to simulate multiple time windows time.sleep(random.randrange(0, 1, 1) / 10.0) data.update({'pid': os.getpid()}) connectdb(alias='producer') stream_name = 'test' stream = Stream.get_or_create(stream_name) print("producing ... {}".format(data)) stream.append(data)
def test_fixed_size(self): """ Test batch windows of fixed sizes work ok """ from minibatch import streaming def consumer(q): logger.debug("starting consumer on {self.url}".format(**locals())) url = str(self.url) # note the stream decorator blocks the consumer and runs the decorated # function asynchronously upon the window criteria is satisfied @streaming('test', size=2, keep=True, url=self.url, queue=q) def myprocess(window): logger.debug("*** processing") try: db = connectdb(url) db.processed.insert_one({'data': window.data or {}}) except Exception as e: print(e) raise # start stream consumer q = Queue() stream = Stream.get_or_create('test', url=self.url) proc = Process(target=consumer, args=(q, )) proc.start() # fill stream for i in range(10): stream.append({'index': i}) # give it some time to process logger.debug("waiting") self.sleep(10) q.put(True) # stop @streaming proc.join() # expect 5 entries, each of length 2 data = list(doc for doc in self.db.processed.find()) count = len(data) self.assertEqual(count, 5) self.assertTrue(all(len(w) == 2 for w in data))
def test_timed_window(self): """ Test timed windows work ok """ from minibatch import streaming def consumer(q): # note the stream decorator blocks the consumer and runs the decorated # function asynchronously upon the window criteria is satisfied url = str(self.url) @streaming('test', interval=1, relaxed=False, keep=True, queue=q) def myprocess(window): try: db = connectdb(url=url) db.processed.insert_one({'data': window.data or {}}) except Exception as e: print(e) return window # start stream consumer q = Queue() stream = Stream.get_or_create('test', url=self.url) proc = Process(target=consumer, args=(q, )) proc.start() # fill stream for i in range(10): stream.append({'index': i}) self.sleep(.5) # give it some time to process self.sleep(5) q.put(True) proc.join() # expect at least 5 entries (10 x .5 = 5 seconds), each of length 1-2 data = list(doc for doc in self.db.processed.find()) count = len(data) self.assertGreater(count, 5) self.assertTrue(all(len(w) >= 2 for w in data))
def stream(self): if self._stream: return self._stream self._stream = Stream.get_or_create(self.stream_name, url=self._stream_url) return self._stream