class BatchTests(unittest.TestCase): def setUp(self): self.backend = BatchBackend('test') def tearDown(self): self.backend.close() def pipeline(self): return Pipeline(backend=self.backend, default_image='google/cloud-sdk:237.0.0-alpine', attributes={ 'foo': 'a', 'bar': 'b' }) def test_single_task_no_io(self): p = self.pipeline() t = p.new_task() t.command('echo hello') assert p.run().status()['state'] == 'success' def test_single_task_input(self): p = self.pipeline() input = p.read_input(f'{gcs_input_dir}/hello.txt') t = p.new_task() t.command(f'cat {input}') assert p.run().status()['state'] == 'success' def test_single_task_input_resource_group(self): p = self.pipeline() input = p.read_input_group(foo=f'{gcs_input_dir}/hello.txt') t = p.new_task() t.storage('0.25Gi') t.command(f'cat {input.foo}') assert p.run().status()['state'] == 'success' def test_single_task_output(self): p = self.pipeline() t = p.new_task(attributes={'a': 'bar', 'b': 'foo'}) t.command(f'echo hello > {t.ofile}') assert p.run().status()['state'] == 'success' def test_single_task_write_output(self): p = self.pipeline() t = p.new_task() t.command(f'echo hello > {t.ofile}') p.write_output(t.ofile, f'{gcs_output_dir}/test_single_task_output.txt') assert p.run().status()['state'] == 'success' def test_single_task_resource_group(self): p = self.pipeline() t = p.new_task() t.declare_resource_group(output={'foo': '{root}.foo'}) t.command(f'echo "hello" > {t.output.foo}') assert p.run().status()['state'] == 'success' def test_single_task_write_resource_group(self): p = self.pipeline() t = p.new_task() t.declare_resource_group(output={'foo': '{root}.foo'}) t.command(f'echo "hello" > {t.output.foo}') p.write_output( t.output, f'{gcs_output_dir}/test_single_task_write_resource_group') p.write_output( t.output.foo, f'{gcs_output_dir}/test_single_task_write_resource_group_file.txt') assert p.run().status()['state'] == 'success' def test_multiple_dependent_tasks(self): output_file = f'{gcs_output_dir}/test_multiple_dependent_tasks.txt' p = self.pipeline() t = p.new_task() t.command(f'echo "0" >> {t.ofile}') for i in range(1, 3): t2 = p.new_task() t2.command(f'echo "{i}" > {t2.tmp1}') t2.command(f'cat {t.ofile} {t2.tmp1} > {t2.ofile}') t = t2 p.write_output(t.ofile, output_file) assert p.run().status()['state'] == 'success' def test_specify_cpu(self): p = self.pipeline() t = p.new_task() t.cpu('0.5') t.command(f'echo "hello" > {t.ofile}') assert p.run().status()['state'] == 'success' def test_specify_memory(self): p = self.pipeline() t = p.new_task() t.memory('100M') t.command(f'echo "hello" > {t.ofile}') assert p.run().status()['state'] == 'success' def test_scatter_gather(self): p = self.pipeline() for i in range(3): t = p.new_task(name=f'foo{i}') t.command(f'echo "{i}" > {t.ofile}') merger = p.new_task() merger.command('cat {files} > {ofile}'.format(files=' '.join([ t.ofile for t in sorted( p.select_tasks('foo'), key=lambda x: x.name, reverse=True) ]), ofile=merger.ofile)) assert p.run().status()['state'] == 'success' def test_file_name_space(self): p = self.pipeline() input = p.read_input(f'{gcs_input_dir}/hello (foo) spaces.txt') t = p.new_task() t.command(f'cat {input} > {t.ofile}') p.write_output(t.ofile, f'{gcs_output_dir}/hello (foo) spaces.txt') assert p.run().status()['state'] == 'success' def test_dry_run(self): p = self.pipeline() t = p.new_task() t.command(f'echo hello > {t.ofile}') p.write_output(t.ofile, f'{gcs_output_dir}/test_single_task_output.txt') p.run(dry_run=True) def test_verbose(self): p = self.pipeline() input = p.read_input(f'{gcs_input_dir}/hello.txt') t = p.new_task() t.command(f'cat {input}') p.write_output(input, f'{gcs_output_dir}/hello.txt') assert p.run(verbose=True).status()['state'] == 'success' def test_benchmark_lookalike_workflow(self): p = self.pipeline() setup_tasks = [] for i in range(10): t = p.new_task(f'setup_{i}').cpu(0.1) t.command(f'echo "foo" > {t.ofile}') setup_tasks.append(t) tasks = [] for i in range(500): t = p.new_task(f'create_file_{i}').cpu(0.1) t.command( f'echo {setup_tasks[i % len(setup_tasks)].ofile} > {t.ofile}') t.command(f'echo "bar" >> {t.ofile}') tasks.append(t) combine = p.new_task(f'combine_output').cpu(0.1) for tasks in grouped(arg_max(), tasks): combine.command( f'cat {" ".join(shq(t.ofile) for t in tasks)} >> {combine.ofile}' ) p.write_output(combine.ofile, f'{gcs_output_dir}/pipeline_benchmark_test.txt')
class BatchTests(unittest.TestCase): def setUp(self): self.backend = BatchBackend(os.environ.get('BATCH_URL')) def tearDown(self): self.backend.close() def pipeline(self): return Pipeline(backend=self.backend, default_image='google/cloud-sdk:237.0.0-alpine', attributes={'foo': 'a', 'bar': 'b'}) def test_single_task_no_io(self): p = self.pipeline() t = p.new_task() t.command('echo hello') p.run() def test_single_task_input(self): p = self.pipeline() input = p.read_input(f'{gcs_input_dir}/hello.txt') t = p.new_task() t.command(f'cat {input}') p.run() def test_single_task_input_resource_group(self): p = self.pipeline() input = p.read_input_group(foo=f'{gcs_input_dir}/hello.txt') t = p.new_task() t.storage('0.25Gi') t.command(f'cat {input.foo}') p.run() def test_single_task_output(self): p = self.pipeline() t = p.new_task(attributes={'a': 'bar', 'b': 'foo'}) t.command(f'echo hello > {t.ofile}') p.run() def test_single_task_write_output(self): p = self.pipeline() t = p.new_task() t.command(f'echo hello > {t.ofile}') p.write_output(t.ofile, f'{gcs_output_dir}/test_single_task_output.txt') p.run() def test_single_task_resource_group(self): p = self.pipeline() t = p.new_task() t.declare_resource_group(output={'foo': '{root}.foo'}) t.command(f'echo "hello" > {t.output.foo}') p.run() def test_single_task_write_resource_group(self): p = self.pipeline() t = p.new_task() t.declare_resource_group(output={'foo': '{root}.foo'}) t.command(f'echo "hello" > {t.output.foo}') p.write_output(t.output, f'{gcs_output_dir}/test_single_task_write_resource_group') p.write_output(t.output.foo, f'{gcs_output_dir}/test_single_task_write_resource_group_file.txt') p.run() def test_multiple_dependent_tasks(self): output_file = f'{gcs_output_dir}/test_multiple_dependent_tasks.txt' p = self.pipeline() t = p.new_task() t.command(f'echo "0" >> {t.ofile}') for i in range(1, 3): t2 = p.new_task() t2.command(f'echo "{i}" > {t2.tmp1}') t2.command(f'cat {t.ofile} {t2.tmp1} > {t2.ofile}') t = t2 p.write_output(t.ofile, output_file) p.run() def test_specify_cpu(self): p = self.pipeline() t = p.new_task() t.cpu('0.5') t.command(f'echo "hello" > {t.ofile}') p.run() def test_specify_memory(self): p = self.pipeline() t = p.new_task() t.memory('100M') t.command(f'echo "hello" > {t.ofile}') p.run() def test_scatter_gather(self): p = self.pipeline() for i in range(3): t = p.new_task(name=f'foo{i}') t.command(f'echo "{i}" > {t.ofile}') merger = p.new_task() merger.command('cat {files} > {ofile}'.format(files=' '.join([t.ofile for t in sorted(p.select_tasks('foo'), key=lambda x: x.name, reverse=True)]), ofile=merger.ofile)) p.run() def test_file_name_space(self): p = self.pipeline() input = p.read_input(f'{gcs_input_dir}/hello (foo) spaces.txt') t = p.new_task() t.command(f'cat {input} > {t.ofile}') p.write_output(t.ofile, f'{gcs_output_dir}/hello (foo) spaces.txt') p.run() def test_dry_run(self): p = self.pipeline() t = p.new_task() t.command(f'echo hello > {t.ofile}') p.write_output(t.ofile, f'{gcs_output_dir}/test_single_task_output.txt') p.run(dry_run=True) def test_verbose(self): p = self.pipeline() input = p.read_input(f'{gcs_input_dir}/hello.txt') t = p.new_task() t.command(f'cat {input}') p.write_output(input, f'{gcs_output_dir}/hello.txt') p.run(verbose=True) def test_failed_job_error_msg(self): with self.assertRaises(PipelineException): p = self.pipeline() t = p.new_task() t.command('false') p.run()