async def create(billing_project: str, deploy_config: Optional[DeployConfig] = None, session: Optional[httpx.ClientSession] = None, headers: Optional[Dict[str, str]] = None, _token: Optional[str] = None, token_file: Optional[str] = None): if not deploy_config: deploy_config = get_deploy_config() url = deploy_config.base_url('batch') if session is None: session = httpx.client_session() if headers is None: headers = {} if _token: headers['Authorization'] = f'Bearer {_token}' else: headers.update( service_auth_headers(deploy_config, 'batch', token_file=token_file)) return BatchClient(billing_project=billing_project, url=url, session=session, headers=headers)
def __init__(self, name, binding_host, port, leader, dbuf, aiofiles): self.deploy_config = get_deploy_config() self.app = web.Application(client_max_size=50 * 1024 * 1024) self.routes = web.RouteTableDef() self.workers = set() self.name = name self.binding_host = binding_host self.port = port self.dbuf = dbuf self.leader = leader self.leader_url = self.deploy_config.base_url(leader) self.aiofiles = aiofiles self.shuffle_create_lock = asyncio.Lock() self.app.add_routes([ web.post('/s', self.create), web.post('/s/{session}', self.post), web.post('/s/{session}/get', self.get), web.post('/s/{session}/getmany', self.getmany), web.delete('/s/{session}', self.delete), web.post('/w', self.register_worker), web.get('/w', self.list_workers), web.get('/healthcheck', self.healthcheck), ]) self.app.on_cleanup.append(self.cleanup)
def __init__(self, gcs_project=None, fs=None, deploy_config=None, session=None, headers=None, _token=None): if not deploy_config: self._deploy_config = get_deploy_config() else: self._deploy_config = deploy_config self.url = self._deploy_config.base_url('memory') self.objects_url = f'{self.url}/api/v1alpha/objects' self._session = session if fs is None: fs = GCS(blocking_pool=concurrent.futures.ThreadPoolExecutor(), project=gcs_project) self._fs = fs self._headers = {} if headers: self._headers.update(headers) if _token: self._headers['Authorization'] = f'Bearer {_token}'
rest_authenticated_developers_only, web_authenticated_developers_only, \ check_csrf_token from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template, \ set_message from .constants import BUCKET from .github import Repo, FQBranch, WatchedBranch, UnwatchedBranch with open(os.environ.get('HAIL_CI_OAUTH_TOKEN', 'oauth-token/oauth-token'), 'r') as f: oauth_token = f.read().strip() log = logging.getLogger('ci') uvloop.install() deploy_config = get_deploy_config() watched_branches = [ WatchedBranch(index, FQBranch.from_short_str(bss), deployable) for (index, [bss, deployable]) in enumerate(json.loads(os.environ.get('HAIL_WATCHED_BRANCHES', '[]'))) ] routes = web.RouteTableDef() start_time = datetime.datetime.now() @routes.get('') @routes.get('/') @web_authenticated_developers_only() async def index(request, userdata): # pylint: disable=unused-argument
def __init__(self, deploy_config=None): if not deploy_config: deploy_config = get_deploy_config() self._deploy_config = deploy_config self._session = None
def __init__(self, *, deploy_config: Optional[DeployConfig] = None): if not deploy_config: deploy_config = get_deploy_config() self.deploy_config = deploy_config self.url = deploy_config.base_url('query') self._session: Optional[aiohttp.ClientSession] = None
def get_tokens_file(): deploy_config = get_deploy_config() location = deploy_config.location() if location == 'external': return os.path.expanduser('~/.hail/tokens.json') return '/user-tokens/tokens.json'
def __init__(self): self._deploy_config = get_deploy_config() self._session = None
def _run(self, batch: 'batch.Batch', dry_run: bool, verbose: bool, delete_scratch_on_exit: bool, wait: bool = True, open: bool = False, disable_progress_bar: bool = False, callback: Optional[str] = None, token: Optional[str] = None, **backend_kwargs): # pylint: disable-msg=too-many-statements """Execute a batch. Warning ------- This method should not be called directly. Instead, use :meth:`.batch.Batch.run` and pass :class:`.ServiceBackend` specific arguments as key-word arguments. Parameters ---------- batch: Batch to execute. dry_run: If `True`, don't execute code. verbose: If `True`, print debugging output. delete_scratch_on_exit: If `True`, delete temporary directories with intermediate files. wait: If `True`, wait for the batch to finish executing before returning. open: If `True`, open the UI page for the batch. disable_progress_bar: If `True`, disable the progress bar. callback: If not `None`, a URL that will receive at most one POST request after the entire batch completes. token: If not `None`, a string used for idempotency of batch submission. """ if backend_kwargs: raise ValueError(f'ServiceBackend does not support any of these keywords: {backend_kwargs}') build_dag_start = time.time() uid = uuid.uuid4().hex[:6] remote_tmpdir = f'gs://{self._bucket_name}/batch/{uid}' local_tmpdir = f'/io/batch/{uid}' default_image = 'ubuntu:18.04' attributes = copy.deepcopy(batch.attributes) if batch.name is not None: attributes['name'] = batch.name bc_batch = self._batch_client.create_batch(attributes=attributes, callback=callback, token=token, cancel_after_n_failures=batch._cancel_after_n_failures) n_jobs_submitted = 0 used_remote_tmpdir = False job_to_client_job_mapping: Dict[_job.Job, bc.Job] = {} jobs_to_command = {} commands = [] bash_flags = 'set -e' + ('x' if verbose else '') activate_service_account = 'gcloud -q auth activate-service-account ' \ '--key-file=/gsa-key/key.json' def copy_input(r): if isinstance(r, resource.InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))] def copy_external_output(r): if isinstance(r, resource.InputResourceFile): return [(r._input_path, dest) for dest in r._output_paths] assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths] def symlink_input_resource_group(r): symlinks = [] if isinstance(r, resource.ResourceGroup) and r._source is None: for name, irf in r._resources.items(): src = irf._get_path(local_tmpdir) dest = f'{r._get_path(local_tmpdir)}.{name}' symlinks.append(f'ln -sf {shq(src)} {shq(dest)}') return symlinks write_external_inputs = [x for r in batch._input_resources for x in copy_external_output(r)] if write_external_inputs: def _cp(src, dst): return f'gsutil -m cp -R {shq(src)} {shq(dst)}' write_cmd = f''' {bash_flags} {activate_service_account} {' && '.join([_cp(*files) for files in write_external_inputs])} ''' if dry_run: commands.append(write_cmd) else: j = bc_batch.create_job(image='gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine', command=['/bin/bash', '-c', write_cmd], attributes={'name': 'write_external_inputs'}) jobs_to_command[j] = write_cmd n_jobs_submitted += 1 for job in batch._jobs: if isinstance(job, _job.PythonJob): if job._image is None: version = sys.version_info if version.major != 3 or version.minor not in (6, 7, 8): raise BatchException( f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})") job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim' job._compile(local_tmpdir, remote_tmpdir) inputs = [x for r in job._inputs for x in copy_input(r)] outputs = [x for r in job._internal_outputs for x in copy_internal_output(r)] if outputs: used_remote_tmpdir = True outputs += [x for r in job._external_outputs for x in copy_external_output(r)] symlinks = [x for r in job._mentioned for x in symlink_input_resource_group(r)] env_vars = { **job._env, **{r._uid: r._get_path(local_tmpdir) for r in job._mentioned}} if job._image is None: if verbose: print(f"Using image '{default_image}' since no image was specified.") make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._job_id}' job_command = [cmd.strip() for cmd in job._command] prepared_job_command = (f'{{\n{x}\n}}' for x in job_command) cmd = f''' {bash_flags} {make_local_tmpdir} {"; ".join(symlinks)} {" && ".join(prepared_job_command)} ''' if dry_run: commands.append(cmd) continue parents = [job_to_client_job_mapping[j] for j in job._dependencies] attributes = copy.deepcopy(job.attributes) if job.attributes else dict() if job.name: attributes['name'] = job.name resources: Dict[str, Any] = {} if job._cpu: resources['cpu'] = job._cpu if job._memory: resources['memory'] = job._memory if job._storage: resources['storage'] = job._storage if job._machine_type: resources['machine_type'] = job._machine_type if job._preemptible is not None: resources['preemptible'] = job._preemptible image = job._image if job._image else default_image image_ref = parse_docker_image_reference(image) if not is_google_registry_domain(image_ref.domain) and image_ref.name() not in HAIL_GENETICS_IMAGES: warnings.warn(f'Using an image {image} not in GCR. ' f'Jobs may fail due to Docker Hub rate limits.') j = bc_batch.create_job(image=image, command=[job._shell if job._shell else self._DEFAULT_SHELL, '-c', cmd], parents=parents, attributes=attributes, resources=resources, input_files=inputs if len(inputs) > 0 else None, output_files=outputs if len(outputs) > 0 else None, always_run=job._always_run, timeout=job._timeout, gcsfuse=job._gcsfuse if len(job._gcsfuse) > 0 else None, env=env_vars, requester_pays_project=batch.requester_pays_project, mount_tokens=True) n_jobs_submitted += 1 job_to_client_job_mapping[job] = j jobs_to_command[j] = cmd if dry_run: print("\n\n".join(commands)) return None if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) rm_cmd = f'gsutil -m rm -r {remote_tmpdir}' cmd = f''' {bash_flags} {activate_service_account} {rm_cmd} ''' j = bc_batch.create_job( image='gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine', command=['/bin/bash', '-c', cmd], parents=parents, attributes={'name': 'remove_tmpdir'}, always_run=True) jobs_to_command[j] = cmd n_jobs_submitted += 1 if verbose: print(f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.') submit_batch_start = time.time() bc_batch = bc_batch.submit(disable_progress_bar=disable_progress_bar) jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()} if verbose: print(f'Submitted batch {bc_batch.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:') for jid, cmd in jobs_to_command.items(): print(f'{jid}: {cmd}') print('') deploy_config = get_deploy_config() url = deploy_config.url('batch', f'/batches/{bc_batch.id}') print(f'Submitted batch {bc_batch.id}, see {url}') if open: webbrowser.open(url) if wait: print(f'Waiting for batch {bc_batch.id}...') status = bc_batch.wait() print(f'batch {bc_batch.id} complete: {status["state"]}') return bc_batch
def _run( self, pipeline, dry_run, verbose, delete_scratch_on_exit, wait=True, open=False, batch_submit_args=None): # pylint: disable-msg=too-many-statements build_dag_start = time.time() bucket = self._batch_client.bucket subdir_name = 'pipeline-{}'.format(uuid.uuid4().hex[:12]) remote_tmpdir = f'gs://{bucket}/pipeline/{subdir_name}' local_tmpdir = f'/io/pipeline/{subdir_name}' default_image = 'ubuntu:latest' attributes = pipeline.attributes if pipeline.name is not None: attributes['name'] = pipeline.name batch = self._batch_client.create_batch(attributes=attributes) n_jobs_submitted = 0 used_remote_tmpdir = False task_to_job_mapping = {} jobs_to_command = {} commands = [] bash_flags = 'set -e' + ('x' if verbose else '') + '; ' activate_service_account = 'gcloud -q auth activate-service-account ' \ '--key-file=/gsa-key/key.json' def copy_input(r): if isinstance(r, InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance(r, TaskResourceFile) return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance(r, TaskResourceFile) return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))] def copy_external_output(r): if isinstance(r, InputResourceFile): return [(r._input_path, dest) for dest in r._output_paths] assert isinstance(r, TaskResourceFile) return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths] write_external_inputs = [ x for r in pipeline._input_resources for x in copy_external_output(r) ] if write_external_inputs: def _cp(src, dst): return f'gsutil -m cp -R {src} {dst}' write_cmd = bash_flags + activate_service_account + ' && ' + \ ' && '.join([_cp(*files) for files in write_external_inputs]) if dry_run: commands.append(write_cmd) else: j = batch.create_job( image='google/cloud-sdk:237.0.0-alpine', command=['/bin/bash', '-c', write_cmd], attributes={'name': 'write_external_inputs'}) jobs_to_command[j] = write_cmd n_jobs_submitted += 1 for task in pipeline._tasks: inputs = [x for r in task._inputs for x in copy_input(r)] outputs = [ x for r in task._internal_outputs for x in copy_internal_output(r) ] if outputs: used_remote_tmpdir = True outputs += [ x for r in task._external_outputs for x in copy_external_output(r) ] resource_defs = [ r._declare(directory=local_tmpdir) for r in task._mentioned ] if task._image is None: if verbose: print( f"Using image '{default_image}' since no image was specified." ) make_local_tmpdir = f'mkdir -p {local_tmpdir}/{task._uid}/; ' defs = '; '.join(resource_defs) + '; ' if resource_defs else '' task_command = [cmd.strip() for cmd in task._command] cmd = bash_flags + make_local_tmpdir + defs + " && ".join( task_command) if dry_run: commands.append(cmd) continue parents = [task_to_job_mapping[t] for t in task._dependencies] attributes = task.attributes if task.name: attributes['name'] = task.name resources = {} if task._cpu: resources['cpu'] = task._cpu if task._memory: resources['memory'] = task._memory j = batch.create_job( image=task._image if task._image else default_image, command=['/bin/bash', '-c', cmd], parents=parents, attributes=attributes, resources=resources, input_files=inputs if len(inputs) > 0 else None, output_files=outputs if len(outputs) > 0 else None, pvc_size=task._storage, always_run=task._always_run) n_jobs_submitted += 1 task_to_job_mapping[task] = j jobs_to_command[j] = cmd if dry_run: print("\n\n".join(commands)) return None if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) rm_cmd = f'gsutil -m rm -r {remote_tmpdir}' cmd = bash_flags + f'{activate_service_account} && {rm_cmd}' j = batch.create_job(image='google/cloud-sdk:237.0.0-alpine', command=['/bin/bash', '-c', cmd], parents=parents, attributes={'name': 'remove_tmpdir'}, always_run=True) jobs_to_command[j] = cmd n_jobs_submitted += 1 if verbose: print( f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.' ) submit_batch_start = time.time() batch = batch.submit(**(batch_submit_args or {})) jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()} if verbose: print( f'Submitted batch {batch.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:' ) for jid, cmd in jobs_to_command.items(): print(f'{jid}: {cmd}') print('') deploy_config = get_deploy_config() url = deploy_config.url('batch', f'/batches/{batch.id}') print(f'Submitted batch {batch.id}, see {url}') if open: webbrowser.open(url) if wait: print(f'Waiting for batch {batch.id}...') status = batch.wait() print(f'Batch {batch.id} complete: {status["state"]}') return batch
def _run(self, batch, dry_run, verbose, delete_scratch_on_exit, wait=True, open=False, disable_progress_bar=False ): # pylint: disable-msg=too-many-statements """ Execute a batch. Warning ------- This method should not be called directly. Instead, use :meth:`.Batch.run` and pass :class:`.ServiceBackend` specific arguments as key-word arguments. Parameters ---------- batch: :class:`.Batch` Batch to execute. dry_run: :obj:`bool` If `True`, don't execute code. verbose: :obj:`bool` If `True`, print debugging output. delete_scratch_on_exit: :obj:`bool` If `True`, delete temporary directories with intermediate files. wait: :obj:`bool`, optional If `True`, wait for the batch to finish executing before returning. open: :obj:`bool`, optional If `True`, open the UI page for the batch. disable_progress_bar: :obj:`bool`, optional If `True`, disable the progress bar. """ build_dag_start = time.time() bucket = self._batch_client.bucket subdir_name = 'batch-{}'.format(uuid.uuid4().hex[:12]) remote_tmpdir = f'gs://{bucket}/batch/{subdir_name}' local_tmpdir = f'/io/batch/{subdir_name}' default_image = 'ubuntu:latest' attributes = copy.deepcopy(batch.attributes) if batch.name is not None: attributes['name'] = batch.name bc_batch = self._batch_client.create_batch(attributes=attributes) n_jobs_submitted = 0 used_remote_tmpdir = False job_to_client_job_mapping = {} jobs_to_command = {} commands = [] bash_flags = 'set -e' + ('x' if verbose else '') + '; ' activate_service_account = 'gcloud -q auth activate-service-account ' \ '--key-file=/gsa-key/key.json' def copy_input(r): if isinstance(r, InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance(r, JobResourceFile) return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance(r, JobResourceFile) return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))] def copy_external_output(r): if isinstance(r, InputResourceFile): return [(r._input_path, dest) for dest in r._output_paths] assert isinstance(r, JobResourceFile) return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths] write_external_inputs = [ x for r in batch._input_resources for x in copy_external_output(r) ] if write_external_inputs: def _cp(src, dst): return f'gsutil -m cp -R {src} {dst}' write_cmd = bash_flags + activate_service_account + ' && ' + \ ' && '.join([_cp(*files) for files in write_external_inputs]) if dry_run: commands.append(write_cmd) else: j = bc_batch.create_job( image='google/cloud-sdk:237.0.0-alpine', command=['/bin/bash', '-c', write_cmd], attributes={'name': 'write_external_inputs'}) jobs_to_command[j] = write_cmd n_jobs_submitted += 1 for job in batch._jobs: inputs = [x for r in job._inputs for x in copy_input(r)] outputs = [ x for r in job._internal_outputs for x in copy_internal_output(r) ] if outputs: used_remote_tmpdir = True outputs += [ x for r in job._external_outputs for x in copy_external_output(r) ] env_vars = { r._uid: r._get_path(local_tmpdir) for r in job._mentioned } if job._image is None: if verbose: print( f"Using image '{default_image}' since no image was specified." ) make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._uid}/; ' job_command = [cmd.strip() for cmd in job._command] cmd = bash_flags + make_local_tmpdir + " && ".join(job_command) if dry_run: commands.append(cmd) continue parents = [job_to_client_job_mapping[j] for j in job._dependencies] attributes = copy.deepcopy(job.attributes) if job.name: attributes['name'] = job.name resources = {} if job._cpu: resources['cpu'] = job._cpu if job._memory: resources['memory'] = job._memory j = bc_batch.create_job( image=job._image if job._image else default_image, command=['/bin/bash', '-c', cmd], parents=parents, attributes=attributes, resources=resources, input_files=inputs if len(inputs) > 0 else None, output_files=outputs if len(outputs) > 0 else None, pvc_size=job._storage, always_run=job._always_run, timeout=job._timeout, env=env_vars) n_jobs_submitted += 1 job_to_client_job_mapping[job] = j jobs_to_command[j] = cmd if dry_run: print("\n\n".join(commands)) return None if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) rm_cmd = f'gsutil -m rm -r {remote_tmpdir}' cmd = bash_flags + f'{activate_service_account} && {rm_cmd}' j = bc_batch.create_job(image='google/cloud-sdk:237.0.0-alpine', command=['/bin/bash', '-c', cmd], parents=parents, attributes={'name': 'remove_tmpdir'}, always_run=True) jobs_to_command[j] = cmd n_jobs_submitted += 1 if verbose: print( f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.' ) submit_batch_start = time.time() bc_batch = bc_batch.submit(disable_progress_bar=disable_progress_bar) jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()} if verbose: print( f'Submitted batch {bc_batch.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:' ) for jid, cmd in jobs_to_command.items(): print(f'{jid}: {cmd}') print('') deploy_config = get_deploy_config() url = deploy_config.url('batch', f'/batches/{bc_batch.id}') print(f'Submitted batch {bc_batch.id}, see {url}') if open: webbrowser.open(url) if wait: print(f'Waiting for batch {bc_batch.id}...') status = bc_batch.wait() print(f'batch {bc_batch.id} complete: {status["state"]}') return bc_batch
def _run(self, batch: 'Batch', dry_run: bool, verbose: bool, delete_scratch_on_exit: bool, wait: bool = True, open: bool = False, disable_progress_bar: bool = False, callback: Optional[str] = None, **backend_kwargs): # pylint: disable-msg=too-many-statements """Execute a batch. Warning ------- This method should not be called directly. Instead, use :meth:`.Batch.run` and pass :class:`.ServiceBackend` specific arguments as key-word arguments. Parameters ---------- batch: :class:`.Batch` Batch to execute. dry_run: :obj:`bool` If `True`, don't execute code. verbose: :obj:`bool` If `True`, print debugging output. delete_scratch_on_exit: :obj:`bool` If `True`, delete temporary directories with intermediate files. wait: :obj:`bool`, optional If `True`, wait for the batch to finish executing before returning. open: :obj:`bool`, optional If `True`, open the UI page for the batch. disable_progress_bar: :obj:`bool`, optional If `True`, disable the progress bar. callback: :obj:`str`, optional If not `None`, a URL that will receive at most one POST request after the entire batch completes. """ if backend_kwargs: raise ValueError( f'ServiceBackend does not support any of these keywords: {backend_kwargs}' ) build_dag_start = time.time() token = uuid.uuid4().hex[:6] remote_tmpdir = f'gs://{self._bucket_name}/batch/{token}' local_tmpdir = f'/io/batch/{token}' default_image = 'ubuntu:latest' attributes = copy.deepcopy(batch.attributes) if batch.name is not None: attributes['name'] = batch.name bc_batch = self._batch_client.create_batch(attributes=attributes, callback=callback) n_jobs_submitted = 0 used_remote_tmpdir = False job_to_client_job_mapping: Dict['Job', bc.Job] = {} jobs_to_command = {} commands = [] bash_flags = 'set -e' + ('x' if verbose else '') activate_service_account = 'gcloud -q auth activate-service-account ' \ '--key-file=/gsa-key/key.json' def copy_input(r): if isinstance(r, InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance(r, JobResourceFile) return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance(r, JobResourceFile) return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))] def copy_external_output(r): if isinstance(r, InputResourceFile): return [(r._input_path, dest) for dest in r._output_paths] assert isinstance(r, JobResourceFile) return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths] def symlink_input_resource_group(r): symlinks = [] if isinstance(r, ResourceGroup) and r._source is None: for name, irf in r._resources.items(): src = irf._get_path(local_tmpdir) dest = f'{r._get_path(local_tmpdir)}.{name}' symlinks.append(f'ln -sf {shq(src)} {shq(dest)}') return symlinks write_external_inputs = [ x for r in batch._input_resources for x in copy_external_output(r) ] if write_external_inputs: def _cp(src, dst): return f'gsutil -m cp -R {shq(src)} {shq(dst)}' write_cmd = f''' {bash_flags} {activate_service_account} {' && '.join([_cp(*files) for files in write_external_inputs])} ''' if dry_run: commands.append(write_cmd) else: j = bc_batch.create_job( image='google/cloud-sdk:237.0.0-alpine', command=['/bin/bash', '-c', write_cmd], attributes={'name': 'write_external_inputs'}) jobs_to_command[j] = write_cmd n_jobs_submitted += 1 for job in batch._jobs: inputs = [x for r in job._inputs for x in copy_input(r)] outputs = [ x for r in job._internal_outputs for x in copy_internal_output(r) ] if outputs: used_remote_tmpdir = True outputs += [ x for r in job._external_outputs for x in copy_external_output(r) ] symlinks = [ x for r in job._mentioned for x in symlink_input_resource_group(r) ] env_vars = { **job._env, **{r._uid: r._get_path(local_tmpdir) for r in job._mentioned} } if job._image is None: if verbose: print( f"Using image '{default_image}' since no image was specified." ) make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._job_id}' job_command = [cmd.strip() for cmd in job._command] prepared_job_command = (f'{{\n{x}\n}}' for x in job_command) cmd = f''' {bash_flags} {make_local_tmpdir} {"; ".join(symlinks)} {" && ".join(prepared_job_command)} ''' if dry_run: commands.append(cmd) continue parents = [job_to_client_job_mapping[j] for j in job._dependencies] attributes = copy.deepcopy( job.attributes) if job.attributes else dict() if job.name: attributes['name'] = job.name resources = {} if job._cpu: resources['cpu'] = job._cpu if job._memory: resources['memory'] = job._memory if job._storage: resources['storage'] = job._storage j = bc_batch.create_job( image=job._image if job._image else default_image, command=['/bin/bash', '-c', cmd], parents=parents, attributes=attributes, resources=resources, input_files=inputs if len(inputs) > 0 else None, output_files=outputs if len(outputs) > 0 else None, always_run=job._always_run, timeout=job._timeout, gcsfuse=job._gcsfuse if len(job._gcsfuse) > 0 else None, env=env_vars, requester_pays_project=batch.requester_pays_project) n_jobs_submitted += 1 job_to_client_job_mapping[job] = j jobs_to_command[j] = cmd if dry_run: print("\n\n".join(commands)) return None if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) rm_cmd = f'gsutil -m rm -r {remote_tmpdir}' cmd = f''' {bash_flags} {activate_service_account} {rm_cmd} ''' j = bc_batch.create_job(image='google/cloud-sdk:237.0.0-alpine', command=['/bin/bash', '-c', cmd], parents=parents, attributes={'name': 'remove_tmpdir'}, always_run=True) jobs_to_command[j] = cmd n_jobs_submitted += 1 if verbose: print( f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.' ) submit_batch_start = time.time() bc_batch = bc_batch.submit(disable_progress_bar=disable_progress_bar) jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()} if verbose: print( f'Submitted batch {bc_batch.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:' ) for jid, cmd in jobs_to_command.items(): print(f'{jid}: {cmd}') print('') deploy_config = get_deploy_config() url = deploy_config.url('batch', f'/batches/{bc_batch.id}') print(f'Submitted batch {bc_batch.id}, see {url}') if open: webbrowser.open(url) if wait: print(f'Waiting for batch {bc_batch.id}...') status = bc_batch.wait() print(f'batch {bc_batch.id} complete: {status["state"]}') return bc_batch
def main(args): # pylint: disable=unused-argument deploy_config = get_deploy_config() print(f' location: {deploy_config.location()}') print(f' default_namespace: {deploy_config._default_namespace}') print(f' domain: {deploy_config._domain}')
async def _async_run( self, batch: 'batch.Batch', dry_run: bool, verbose: bool, delete_scratch_on_exit: bool, wait: bool = True, open: bool = False, disable_progress_bar: bool = False, callback: Optional[str] = None, token: Optional[str] = None, **backend_kwargs): # pylint: disable-msg=too-many-statements if backend_kwargs: raise ValueError( f'ServiceBackend does not support any of these keywords: {backend_kwargs}' ) build_dag_start = time.time() uid = uuid.uuid4().hex[:6] batch_remote_tmpdir = f'{self.remote_tmpdir}{uid}' local_tmpdir = f'/io/batch/{uid}' default_image = 'ubuntu:20.04' attributes = copy.deepcopy(batch.attributes) if batch.name is not None: attributes['name'] = batch.name bc_batch = self._batch_client.create_batch( attributes=attributes, callback=callback, token=token, cancel_after_n_failures=batch._cancel_after_n_failures) n_jobs_submitted = 0 used_remote_tmpdir = False job_to_client_job_mapping: Dict[_job.Job, bc.Job] = {} jobs_to_command = {} commands = [] bash_flags = 'set -e' + ('x' if verbose else '') def copy_input(r): if isinstance(r, resource.InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance( r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(batch_remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance( r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(local_tmpdir), r._get_path(batch_remote_tmpdir))] def copy_external_output(r): if isinstance(r, resource.InputResourceFile): return [(r._input_path, dest) for dest in r._output_paths] assert isinstance( r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths] def symlink_input_resource_group(r): symlinks = [] if isinstance(r, resource.ResourceGroup) and r._source is None: for name, irf in r._resources.items(): src = irf._get_path(local_tmpdir) dest = f'{r._get_path(local_tmpdir)}.{name}' symlinks.append(f'ln -sf {shq(src)} {shq(dest)}') return symlinks write_external_inputs = [ x for r in batch._input_resources for x in copy_external_output(r) ] if write_external_inputs: transfers_bytes = orjson.dumps([{ "from": src, "to": dest } for src, dest in write_external_inputs]) transfers = transfers_bytes.decode('utf-8') write_cmd = [ 'python3', '-m', 'hailtop.aiotools.copy', 'null', transfers ] if dry_run: commands.append(' '.join(shq(x) for x in write_cmd)) else: j = bc_batch.create_job( image=HAIL_GENETICS_HAIL_IMAGE, command=write_cmd, attributes={'name': 'write_external_inputs'}) jobs_to_command[j] = ' '.join(shq(x) for x in write_cmd) n_jobs_submitted += 1 pyjobs = [j for j in batch._jobs if isinstance(j, _job.PythonJob)] for job in pyjobs: if job._image is None: version = sys.version_info if version.major != 3 or version.minor not in (6, 7, 8): raise BatchException( f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})" ) job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim' with tqdm(total=len(batch._jobs), desc='upload code', disable=disable_progress_bar) as pbar: async def compile_job(job): used_remote_tmpdir = await job._compile(local_tmpdir, batch_remote_tmpdir, dry_run=dry_run) pbar.update(1) return used_remote_tmpdir used_remote_tmpdir_results = await bounded_gather( *[functools.partial(compile_job, j) for j in batch._jobs], parallelism=150) used_remote_tmpdir |= any(used_remote_tmpdir_results) for job in tqdm(batch._jobs, desc='create job objects', disable=disable_progress_bar): inputs = [x for r in job._inputs for x in copy_input(r)] outputs = [ x for r in job._internal_outputs for x in copy_internal_output(r) ] if outputs: used_remote_tmpdir = True outputs += [ x for r in job._external_outputs for x in copy_external_output(r) ] symlinks = [ x for r in job._mentioned for x in symlink_input_resource_group(r) ] if job._image is None: if verbose: print( f"Using image '{default_image}' since no image was specified." ) make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._dirname}' job_command = [cmd.strip() for cmd in job._wrapper_code] prepared_job_command = (f'{{\n{x}\n}}' for x in job_command) cmd = f''' {bash_flags} {make_local_tmpdir} {"; ".join(symlinks)} {" && ".join(prepared_job_command)} ''' user_code = '\n\n'.join(job._user_code) if job._user_code else None if dry_run: formatted_command = f''' ================================================================================ # Job {job._job_id} {f": {job.name}" if job.name else ''} -------------------------------------------------------------------------------- ## USER CODE -------------------------------------------------------------------------------- {user_code} -------------------------------------------------------------------------------- ## COMMAND -------------------------------------------------------------------------------- {cmd} ================================================================================ ''' commands.append(formatted_command) continue parents = [job_to_client_job_mapping[j] for j in job._dependencies] attributes = copy.deepcopy( job.attributes) if job.attributes else {} if job.name: attributes['name'] = job.name resources: Dict[str, Any] = {} if job._cpu: resources['cpu'] = job._cpu if job._memory: resources['memory'] = job._memory if job._storage: resources['storage'] = job._storage if job._machine_type: resources['machine_type'] = job._machine_type if job._preemptible is not None: resources['preemptible'] = job._preemptible image = job._image if job._image else default_image image_ref = parse_docker_image_reference(image) if image_ref.hosted_in('dockerhub') and image_ref.name( ) not in HAIL_GENETICS_IMAGES: warnings.warn(f'Using an image {image} from Docker Hub. ' f'Jobs may fail due to Docker Hub rate limits.') env = {**job._env, 'BATCH_TMPDIR': local_tmpdir} j = bc_batch.create_job( image=image, command=[ job._shell if job._shell else DEFAULT_SHELL, '-c', cmd ], parents=parents, attributes=attributes, resources=resources, input_files=inputs if len(inputs) > 0 else None, output_files=outputs if len(outputs) > 0 else None, always_run=job._always_run, timeout=job._timeout, cloudfuse=job._cloudfuse if len(job._cloudfuse) > 0 else None, env=env, requester_pays_project=batch.requester_pays_project, mount_tokens=True, user_code=user_code) n_jobs_submitted += 1 job_to_client_job_mapping[job] = j jobs_to_command[j] = cmd if dry_run: print("\n\n".join(commands)) return None if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) j = bc_batch.create_job(image=HAIL_GENETICS_HAIL_IMAGE, command=[ 'python3', '-m', 'hailtop.aiotools.delete', batch_remote_tmpdir ], parents=parents, attributes={'name': 'remove_tmpdir'}, always_run=True) jobs_to_command[j] = cmd n_jobs_submitted += 1 if verbose: print( f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.' ) submit_batch_start = time.time() batch_handle = bc_batch.submit( disable_progress_bar=disable_progress_bar) jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()} if verbose: print( f'Submitted batch {batch_handle.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:' ) for jid, cmd in jobs_to_command.items(): print(f'{jid}: {cmd}') print('') deploy_config = get_deploy_config() url = deploy_config.url('batch', f'/batches/{batch_handle.id}') print(f'Submitted batch {batch_handle.id}, see {url}') if open: webbrowser.open(url) if wait: print(f'Waiting for batch {batch_handle.id}...') status = batch_handle.wait() print(f'batch {batch_handle.id} complete: {status["state"]}') return batch_handle
import concurrent.futures import aiohttp import gidgethub import zulip from hailtop.config import get_deploy_config from hailtop.utils import check_shell, check_shell_output from .constants import GITHUB_CLONE_URL, AUTHORIZED_USERS from .build import BuildConfiguration, Code from .globals import is_test_deployment repos_lock = asyncio.Lock() log = logging.getLogger('ci') CALLBACK_URL = get_deploy_config().url('ci', '/api/v1alpha/batch_callback') zulip_client = zulip.Client(config_file="/zulip-config/.zuliprc") class Repo: def __init__(self, owner, name): assert isinstance(owner, str) assert isinstance(name, str) self.owner = owner self.name = name self.url = f'{GITHUB_CLONE_URL}{owner}/{name}.git' def __eq__(self, other): return self.owner == other.owner and self.name == other.name