def test_batch(self): b = self.client.create_batch() j1 = b.create_job('ubuntu:18.04', ['false']) j2 = b.create_job('ubuntu:18.04', ['sleep', '1']) j3 = b.create_job('ubuntu:18.04', ['sleep', '30']) b = b.submit() j1.wait() j2.wait() b.cancel() bstatus = b.wait() assert len(bstatus['jobs']) == 3, bstatus state_count = collections.Counter( [j['state'] for j in bstatus['jobs']]) n_cancelled = state_count['Cancelled'] n_complete = state_count['Error'] + state_count[ 'Failed'] + state_count['Success'] assert n_cancelled <= 1, bstatus assert n_cancelled + n_complete == 3, bstatus n_failed = sum([ Job._get_exit_code(j, 'main') > 0 for j in bstatus['jobs'] if j['state'] in ('Failed', 'Error') ]) assert n_failed == 1, bstatus
def test_missing_parent_is_400(client): try: batch = client.create_batch() fake_job = aioclient.Job.unsubmitted_job(batch._async_builder, 10000) fake_job = Job.from_async_job(fake_job) batch.create_job('ubuntu:18.04', command=['echo', 'head'], parents=[fake_job]) batch.submit() except ValueError as err: assert re.search('parents with invalid job ids', str(err)) return assert False
def _run(self, pipeline, dry_run, verbose, delete_scratch_on_exit): # pylint: disable-msg=R0915 start = time.time() bucket = self._batch_client.bucket subdir_name = 'pipeline-{}'.format(uuid.uuid4().hex[:12]) remote_tmpdir = f'gs://{bucket}/pipeline/{subdir_name}' local_tmpdir = f'/io/pipeline/{subdir_name}' default_image = 'ubuntu:latest' attributes = pipeline.attributes if pipeline.name is not None: attributes['name'] = pipeline.name batch = self._batch_client.create_batch(attributes=attributes) n_jobs_submitted = 0 used_remote_tmpdir = False task_to_job_mapping = {} jobs_to_command = {} commands = [] bash_flags = 'set -e' + ('x' if verbose else '') + '; ' activate_service_account = 'gcloud -q auth activate-service-account ' \ '--key-file=/gsa-key/privateKeyData' def copy_input(r): if isinstance(r, InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance(r, TaskResourceFile) return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance(r, TaskResourceFile) return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))] def copy_external_output(r): if isinstance(r, InputResourceFile): return [(r._input_path, dest) for dest in r._output_paths] assert isinstance(r, TaskResourceFile) return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths] write_external_inputs = [x for r in pipeline._input_resources for x in copy_external_output(r)] if write_external_inputs: def _cp(src, dst): return f'gsutil -m cp -R {src} {dst}' write_cmd = bash_flags + activate_service_account + ' && ' + \ ' && '.join([_cp(*files) for files in write_external_inputs]) if dry_run: commands.append(write_cmd) else: j = batch.create_job(image='google/cloud-sdk:237.0.0-alpine', command=['/bin/bash', '-c', write_cmd], attributes={'name': 'write_external_inputs'}) jobs_to_command[j] = write_cmd n_jobs_submitted += 1 for task in pipeline._tasks: inputs = [x for r in task._inputs for x in copy_input(r)] outputs = [x for r in task._internal_outputs for x in copy_internal_output(r)] if outputs: used_remote_tmpdir = True outputs += [x for r in task._external_outputs for x in copy_external_output(r)] resource_defs = [r._declare(directory=local_tmpdir) for r in task._mentioned] if task._image is None: if verbose: print(f"Using image '{default_image}' since no image was specified.") make_local_tmpdir = f'mkdir -p {local_tmpdir}/{task._uid}/; ' defs = '; '.join(resource_defs) + '; ' if resource_defs else '' task_command = [cmd.strip() for cmd in task._command] cmd = bash_flags + make_local_tmpdir + defs + " && ".join(task_command) if dry_run: commands.append(cmd) continue parents = [task_to_job_mapping[t] for t in task._dependencies] attributes = {'task_uid': task._uid} if task.name: attributes['name'] = task.name attributes.update(task.attributes) resources = {'requests': {}} if task._cpu: resources['requests']['cpu'] = task._cpu if task._memory: resources['requests']['memory'] = task._memory j = batch.create_job(image=task._image if task._image else default_image, command=['/bin/bash', '-c', cmd], parents=parents, attributes=attributes, resources=resources, input_files=inputs if len(inputs) > 0 else None, output_files=outputs if len(outputs) > 0 else None, pvc_size=task._storage) n_jobs_submitted += 1 task_to_job_mapping[task] = j jobs_to_command[j] = cmd if dry_run: print("\n\n".join(commands)) return if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) rm_cmd = f'gsutil -m rm -r {remote_tmpdir}' cmd = bash_flags + f'{activate_service_account} && {rm_cmd}' j = batch.create_job( image='google/cloud-sdk:237.0.0-alpine', command=['/bin/bash', '-c', cmd], parents=parents, attributes={'name': 'remove_tmpdir'}, always_run=True) jobs_to_command[j] = cmd n_jobs_submitted += 1 print(f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - start, 3)} seconds:') start = time.time() batch = batch.submit() print(f'Submitted batch {batch.id} with {n_jobs_submitted} jobs in {round(time.time() - start, 3)} seconds:') jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()} if verbose: print(f'Submitted batch {batch.id} with {n_jobs_submitted} jobs in {round(time.time() - start, 3)} seconds:') for jid, cmd in jobs_to_command.items(): print(f'{jid}: {cmd}') status = batch.wait() if status['state'] == 'success': print('Pipeline completed successfully!') return failed_jobs = [((j['batch_id'], j['job_id']), j['exit_code']) for j in status['jobs'] if 'exit_code' in j and any([ec != 0 for _, ec in j['exit_code'].items()])] fail_msg = '' for jid, ec in failed_jobs: ec = Job.exit_code(ec) job = self._batch_client.get_job(*jid) log = job.log() name = job.status()['attributes'].get('name', None) fail_msg += ( f"Job {jid} failed with exit code {ec}:\n" f" Task name:\t{name}\n" f" Command:\t{jobs_to_command[jid]}\n" f" Log:\t{log}\n") raise PipelineException(fail_msg)
def batch_status_exit_codes(batch_status): return [Job._get_exit_codes(j) for j in batch_status['jobs']]