def test_fork_and_join1(self): self.counts = Counter() def count(arg): _, string = arg self.counts[string] += 1 if len(self.counts) == 4: self.assertEqual(self.counts['ttring'], 1) self.assertEqual(self.counts['turing'], 1) self.assertEqual(self.counts['suring'], 1) self.assertEqual(self.counts['string'], 1) def dont_manipulate(arg): stage, string = arg return (stage + 1, string) def manipulate(arg): stage, string = arg l = list(string) l[stage] = chr(ord(l[stage]) + 1) return (stage + 1, ''.join(l)) pl = Pipeline() pl.add(BalancingFork(2)) pl.add(Pipe()) pl.add(Processor(manipulate), Processor(dont_manipulate)) pl.add(ReplicatingFork(2)) pl.add(Processor(manipulate), Processor(dont_manipulate), Processor(manipulate), Processor(dont_manipulate)) pl.add(Join(4)) pl.add(Processor(count)) pl.run([(0, 'string'), (0, 'string')])
def test_pipeline_basic(self): def finalize(arg): self.assertEqual(arg, 3) pl = Pipeline() pl.add(Processor(finalize)) pl.run([3])
def main(): pl = Pipeline() pl.add(Processor(generate_ngrams)) pl.add(ReplicatingFork(3)) pl.add(Processor(train_decision_tree), Processor(train_random_forest), Processor(train_k_neighbors)) with open("sms_data.txt", "r", encoding='latin-1') as file: text = file.read().split('\n') pl.run([text])
def test_pipeline_single_processor(self): def job(arg): return arg + 1 def finalize(arg): self.assertEqual(arg, 4) pl = Pipeline() pl.add(Processor(job)) pl.add(Pipe()) pl.add(Processor(finalize)) pl.run([3])
def test_automatic_open_close(self): from math import sqrt def square_root(arg): return sqrt(arg) def cube(arg): return arg ** 3 pl = Pipeline() pl.add(ReplicatingFork(2)) pl.add(Processor(square_root), Processor(cube)) pl.add(Processor(square_root), Pipe()) pl.add(Pipe()) pl.add(Join(2)) pl.add(Processor(print)) self.assertTrue(pl.closed, 'Pipeline should be closed') pl.run([2, 7, 9]) self.assertTrue(pl.closed, 'Pipeline should be closed')
def test_balancing_forks(self): self.counts = Counter() def job1(arg): return 'job1' def job2(arg): return 'job2' def finalize(arg): self.counts[arg] += 1 pl = Pipeline() pl.add(BalancingFork(2)) pl.add(Processor(job1), Processor(job2)) pl.add(Join(2)) pl.add(Processor(finalize)) pl.run([False, False]) self.assertEqual(self.counts['job1'], self.counts['job2'])
def test_allow_multiple_pipeline_runs(self): def job(arg): if arg == 'second': self.lock.release() self.lock = Lock() pl = Pipeline() pl.add(Processor(dummy_return_arg)) pl.add(Pipe()) pl.add(Processor(dummy_return_arg)) pl.add(Pipe()) pl.add(Processor(job)) self.lock.acquire() pl.run(['first']) pl.run(['second']) self.lock.acquire(blocking=False) self.assertTrue(self.lock.locked(), 'The second pipeline run was not successful')
def test_processor_pipe_mix(self): self.counts = Counter() def count(arg): _, string = arg self.counts[string] += 1 if len(self.counts) == 3: self.assertEqual(self.counts['ttring'], 1) self.assertEqual(self.counts['string'], 2) def manipulate(arg): stage, string = arg l = list(string) l[stage] = chr(ord(l[stage]) + 1) return (stage + 1, ''.join(l)) pl = Pipeline() pl.add(ReplicatingFork(3)) pl.add(Processor(manipulate), Pipe(), Pipe()) pl.add(Join(3)) pl.add(Processor(count)) pl.run([(0, 'string')])
def test_open_close_no_with(self): from math import sqrt def square_root(arg): return sqrt(arg) def cube(arg): return arg ** 3 pl = Pipeline() pl.add(ReplicatingFork(2)) pl.add(Processor(square_root), Processor(cube)) pl.add(Processor(square_root), Pipe()) pl.add(Join(2)) pl.add(Processor(print)) self.assertTrue(pl.closed, 'Pipeline should be closed') pl.open() pl.run([16, 3, 81]) self.assertTrue(pl.opened, 'Pipeline should be open') pl.close() self.assertTrue(pl.closed, 'Pipeline should be closed') pl.open() # Leave it open -- daemon children should be cleaned up self.assertTrue(pl.opened, 'Pipeline should be open')
def test_processor_shared_memory(self): def worker1_task(args): shmem = shared_memory.SharedMemory(name=common_memory) buffer = shmem.buf buffer[:4] = bytearray([00, 11, 22, 33]) shmem.close() return args def worker2_task(args): shmem = shared_memory.SharedMemory(name=common_memory) buffer = shmem.buf buffer[0] = 44 shmem.close() return args def cleanup_task(args): shmem = shared_memory.SharedMemory(name=common_memory) import array print(array.array('b', shmem.buf[:4])) assert shmem.buf[0] == 44 assert shmem.buf[1] == 11 assert shmem.buf[2] == 22 assert shmem.buf[3] == 33 shmem.close() shmem.unlink() return args pl = Pipeline(shared_memory_amt=10) pl.add(Processor(worker1_task)) pl.add(Processor(worker2_task)) pl.add(Processor(cleanup_task)) pl.run(['abc'])