async def auth_aws(hvac, role): """ Get an AWS token save it to the system. boto is not async-friendly, but this call must be. Role is one of pct, robot, developer. """ try: future = asyncio.get_event_loop().run_in_executor( None, lambda: hvac.write(f'aws/sts/{role}', ttl='36h')) token = await asyncio.wait_for(future, 10) except Exception as e: log.error(f"Failed to authenticate hvac: {e}") return False creds = f"[default]\naws_access_key_id = {token['data']['access_key']}\naws_secret_access_key = " + \ f"{token['data']['secret_key']}\naws_security_token = {token['data']['security_token']}\n" async with aiofiles.open('/app/.aws/credentials', "w") as f: await f.write(creds) async with aiofiles.open('/app/.aws/config', "w") as f: await f.write('[default]\nregion = us-east-2\noutput = json\n') log.verbose(f'Authenticated with aws as {role}') return True
async def auth_docker(): """ Authenticate the HOSTS docker daemon """ boto_client = boto3.client('ecr') try: future = asyncio.get_event_loop().run_in_executor( None, boto_client.get_authorization_token) res = await asyncio.wait_for(future, 10) except Exception as e: return log.error(f"Unable to authenticate with AWS ECR: {e}") # Also has an expiresAt key token = res['authorizationData'][0]['authorizationToken'].encode('utf-8') username, password = base64.b64decode(token).decode('utf-8').split(':') endpoint = '778747430246.dkr.ecr.us-east-2.amazonaws.com' command = f'docker login {endpoint} --username AWS --password {password}' ret = await asyncio_utils.stream_subprocess(command, log.verbose, log.verbose, timeout=20) if ret != 0: return log.error('Unable to authenticate docker') log.verbose('Authenticated with docker') return True
async def ensure_auth(hvac, role): """ Check the current authentication state of the system, including AWS and Docker. SSH is not included here, since its developer-only. """ if role == 'snowbot': role = 'robot' last_aws_auth = settings.hatch.aws_auth_time hours = (time.time() - last_aws_auth) / 3600 log.verbose(f'Hours since last AWS auth: {hours}') if hours > properties.aws_auth_period or True: if not await auth_aws(hvac, role): return settings.reload() settings.hatch.aws_auth_time = time.time() settings.save() # Docker last_docker_auth = settings.hatch.docker_auth_time hours = (time.time() - last_docker_auth) / 3600 log.verbose(f'Hours since last Docker auth: {hours}') if hours > properties.docker_auth_period or True: if not await auth_docker(): return settings.reload() settings.hatch.docker_auth_time = time.time() settings.save() # SSH keys if not await ssh_key_valid(): await sign_ssh_key(hvac)
def Submit(config, jobdesc): """ Submits a job to the SLURM queue specified in arc.conf. This method executes the required RunTimeEnvironment scripts and assembles the bash job script. The job script is written to file and submitted with ``sbatch``. :param str config: path to arc.conf :param jobdesc: job description object :type jobdesc: :py:class:`arc.JobDescription` :return: local job ID if successfully submitted, else ``None`` :rtype: :py:obj:`str` """ configure(config, set_slurm) validate_attributes(jobdesc) if Config.remote_host: ssh_connect(Config.remote_host, Config.remote_user, Config.private_key) # Run RTE stage0 debug('----- starting slurmSubmitter.py -----', 'slurm.Submit') RTE_stage0(jobdesc, 'SLURM', SBATCH_ACCOUNT = 'OtherAttributes.SBATCH_ACCOUNT') set_grid_global_jobid(jobdesc) # Create script file and write job script jobscript = get_job_script(jobdesc) script_file = write_script_file(jobscript) debug('Created file %s' % script_file, 'slurm.Submit') debug('SLURM jobname: %s' % jobdesc.Identification.JobName, 'slurm.Submit') debug('SLURM job script built', 'slurm.Submit') debug('----------------- BEGIN job script -----', 'slurm.Submit') emptylines = 0 for line in jobscript.split('\n'): if not line: emptylines += 1 else: debug(emptylines*'\n' + line.replace("%", "%%"), 'slurm.Submit') emptylines = 0 if emptylines > 1: debug((emptylines-1)*'\n', 'slurm.Submit') debug('----------------- END job script -----', 'slurm.Submit') if 'ONLY_WRITE_JOBSCRIPT' in os.environ and os.environ['ONLY_WRITE_JOBSCRIPT'] == 'yes': return "-1" ####################################### # Submit the job ###################################### execute = execute_local if not Config.remote_host else execute_remote directory = jobdesc.OtherAttributes['joboption;directory'] debug('Session directory: %s' % directory, 'slurm.Submit') SLURM_TRIES = 0 handle = None while SLURM_TRIES < 10: args = '%s/oarsub %s' % (Config.slurm_bin_path, script_file) verbose('Executing \'%s\' on %s' % (args, Config.remote_host if Config.remote_host else 'localhost'), 'slurm.Submit') handle = execute(args) if handle.returncode == 0: break if handle.returncode == 198 or wait_for_queue(handle): debug('Waiting for queue to decrease', 'slurm.Submit') time.sleep(60) SLURM_TRIES += 1 continue break # Other error than full queue if handle.returncode == 0: # TODO: Test what happens when the jobqueue is full or when the slurm # ctld is not responding. SLURM 1.x and 2.2.x outputs the jobid into # STDERR and STDOUT respectively. Concat them, and let sed sort it out. # From the exit code we know that the job was submitted, so this # is safe. Ulf Tigerstedt <*****@*****.**> 1.5.2011 localid = get_job_id(handle) if localid: debug('Job submitted successfully!', 'slurm.Submit') debug('Local job id: ' + localid, 'slurm.Submit') debug('----- exiting submitSubmitter.py -----', 'slurm.Submit') return localid debug('job *NOT* submitted successfully!', 'slurm.Submit') debug('got error code from sbatch: %d !' % handle.returncode, 'slurm.Submit') debug('Output is:\n' + ''.join(handle.stdout), 'slurm.Submit') debug('Error output is:\n' + ''.join(handle.stderr), 'slurm.Submit') debug('----- exiting slurmSubmitter.py -----', 'slurm.Submit')
def Submit(config, jobdesc): """ Submits a job to the SLURM queue specified in arc.conf. This method executes the required RunTimeEnvironment scripts and assembles the bash job script. The job script is written to file and submitted with ``sbatch``. :param str config: path to arc.conf :param jobdesc: job description object :type jobdesc: :py:class:`arc.JobDescription` :return: local job ID if successfully submitted, else ``None`` :rtype: :py:obj:`str` """ configure(config, set_slurm) validate_attributes(jobdesc) if Config.remote_host: ssh_connect(Config.remote_host, Config.remote_user, Config.private_key) # Run RTE stage0 debug("----- starting slurmSubmitter.py -----", "slurm.Submit") RTE_stage0(jobdesc, "SLURM", SBATCH_ACCOUNT="OtherAttributes.SBATCH_ACCOUNT") # Create script file and write job script jobscript = get_job_script(jobdesc) script_file = write_script_file(jobscript) debug("SLURM jobname: %s" % jobdesc.Identification.JobName, "slurm.Submit") debug("SLURM job script built", "slurm.Submit") debug("----------------- BEGIN job script -----", "slurm.Submit") for line in jobscript.split("\n"): debug(line, "slurm.Submit") debug("----------------- END job script -----", "slurm.Submit") if "ONLY_WRITE_JOBSCRIPT" in os.environ and os.environ["ONLY_WRITE_JOBSCRIPT"] == "yes": return ####################################### # Submit the job ###################################### execute = execute_local if not Config.remote_host else execute_remote directory = jobdesc.OtherAttributes["joboption;directory"] debug("Session directory: %s" % directory, "slurm.Submit") SLURM_TRIES = 0 handle = None while SLURM_TRIES < 10: args = "%s/sbatch %s" % (Config.slurm_bin_path, script_file) verbose( "Executing '%s' on %s" % (args, Config.remote_host if Config.remote_host else "localhost"), "slurm.Submit" ) handle = execute(args) if handle.returncode == 0: break if handle.returncode == 198 or wait_for_queue(handle): debug("Waiting for queue to decrease", "slurm.Submit") time.sleep(60) SLURM_TRIES += 1 continue break # Other error than full queue if handle.returncode == 0: # TODO: Test what happens when the jobqueue is full or when the slurm # ctld is not responding. SLURM 1.x and 2.2.x outputs the jobid into # STDERR and STDOUT respectively. Concat them, and let sed sort it out. # From the exit code we know that the job was submitted, so this # is safe. Ulf Tigerstedt <*****@*****.**> 1.5.2011 localid = get_job_id(handle) if localid: debug("Job submitted successfully!", "slurm.Submit") debug("Local job id: " + localid, "slurm.Submit") debug("----- exiting submitSubmitter.py -----", "slurm.Submit") return localid debug("job *NOT* submitted successfully!", "slurm.Submit") debug("got error code from sbatch: %d !" % handle.returncode, "slurm.Submit") debug("Output is:\n" + "".join(handle.stdout), "slurm.Submit") debug("Error output is:\n" + "".join(handle.stderr), "slurm.Submit") debug("----- exiting slurmSubmitter.py -----", "slurm.Submit")
def Submit(config, jobdesc): """ Submits a job to the LSF queue specified in arc.conf. This method executes the required RunTimeEnvironment scripts and assembles the bash job script. The job script is written to file and submitted with ``bsub``. :param str config: path to arc.conf :param jobdesc: job description object :type jobdesc: :py:class:`arc.JobDescription` :return: local job ID if successfully submitted, else ``None`` :rtype: :py:obj:`bool` """ configure(config, set_lsf) validate_attributes(jobdesc) if Config.remote_host: ssh_connect(Config.remote_host, Config.remote_user, Config.private_key) # Run RTE stage0 debug('----- starting lsfSubmitter.py -----', 'lsf.Submit') RTE_stage0(jobdesc, 'LSF') # Create script file and write job script jobscript = get_job_script(jobdesc) script_file = write_script_file(jobscript) debug('LSF jobname: %s' % jobdesc.Identification.JobName, 'lsf.Submit') debug('LSF job script built', 'lsf.Submit') debug('----------------- BEGIN job script -----', 'lsf.Submit') for line in jobscript.split('\n'): debug(line, 'lsf.Submit') debug('----------------- END job script -----', 'lsf.Submit') if 'ONLY_WRITE_JOBSCRIPT' in os.environ and os.environ['ONLY_WRITE_JOBSCRIPT'] == 'yes': return False ####################################### # Submit the job ###################################### execute = excute_local if not Config.remote_host else execute_remote directory = jobdesc.OtherAttributes['joboption;directory'] debug('Session directory: %s' % directory, 'lsf.Submit') LSF_TRIES = 0 args = '%s %s/bsub < %s' % (Config.lsf_setup, Config.lsf_bin_path, script_file) verbose('executing \'%s\' on %s' % (args, Config.remote_host if Config.remote_host else 'localhost'), 'lsf.Submit') handle = execute(args) if handle.returncode == 0: localid = get_job_id(handle) if localid: debug('Job submitted successfully!', 'lsf.Submit') debug('Local job id: ' + localid, 'lsf.Submit') debug('----- exiting lsfSubmitter.py -----', 'lsf.Submit') return localid debug('job *NOT* submitted successfully!', 'lsf.Submit') debug('got error code from bsub: %d !' % handle.returncode, 'lsf.Submit') debug('Output is:\n' + ''.join(handle.stdout), 'lsf.Submit') debug('Error output is:\n' + ''.join(handle.stderr), 'lsf.Submit') debug('----- exiting lsfSubmitter.py -----', 'lsf.Submit')