def send_job(job_options, **kwargs): # pylint: disable=W0613 """ Sends a job to the workload manager """ simulate = ctx.instance.runtime_properties['simulate'] name = kwargs['name'] is_singularity = 'croupier.nodes.SingularityJob' in ctx.node.\ type_hierarchy if not simulate: workdir = ctx.instance.runtime_properties['workdir'] wm_type = ctx.instance.runtime_properties['workload_manager'] client = SshClient(ctx.instance.runtime_properties['credentials']) wm = WorkloadManager.factory(wm_type) if not wm: client.close_connection() raise NonRecoverableError("Workload Manager '" + wm_type + "' not supported.") context_vars = { 'CFY_EXECUTION_ID': ctx.execution_id, 'CFY_JOB_NAME': name } is_submitted = wm.submit_job(client, name, job_options, is_singularity, ctx.logger, workdir=workdir, context=context_vars) client.close_connection() else: ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated') is_submitted = True if is_submitted: ctx.logger.info('Job ' + name + ' (' + ctx.instance.id + ') sent.') else: ctx.logger.error('Job ' + name + ' (' + ctx.instance.id + ') not sent.') raise NonRecoverableError('Job ' + name + ' (' + ctx.instance.id + ') not sent.') ctx.instance.runtime_properties['job_name'] = name
def get_states(self, credentials, job_names): call = "cat croupier-monitor.dat" client = SshClient(credentials) output, exit_code = client.execute_shell_command(call, workdir=self.workdir, wait_result=True) client.close_connection() states = {} audits = {} if exit_code == 0: states = self._parse_states(output) for job_name in job_names: audits[job_name] = {} return states, audits
def get_states(self, workdir, credentials, job_names, logger): # TODO set start time of consulting # (sacct only check current day) call = "cat msomonitor.data" client = SshClient(credentials) output, exit_code = client.execute_shell_command( call, workdir=workdir, wait_result=True) client.close_connection() states = {} if exit_code == 0: states = self._parse_states(output, logger) return states
def publish(publish_list, **kwargs): """ Publish the job outputs """ try: simulate = ctx.instance.runtime_properties['simulate'] except KeyError as exp: # The job wasn't configured properly, no need to publish ctx.logger.warning('Job outputs where not published as' + ' the job was not configured properly.') return try: name = kwargs['name'] published = True if not simulate: workdir = ctx.instance.runtime_properties['workdir'] client = SshClient(ctx.instance.runtime_properties['credentials']) for publish_item in publish_list: if not published: break exrep = ExternalRepository.factory(publish_item) if not exrep: client.close_connection() raise NonRecoverableError("External repository '" + publish_item['dataset']['type'] + "' not supported.") published = exrep.publish(client, ctx.logger, workdir) client.close_connection() else: ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated') if published: ctx.logger.info('Job ' + name + ' (' + ctx.instance.id + ') published.') else: ctx.logger.error('Job ' + name + ' (' + ctx.instance.id + ') not published.') raise NonRecoverableError('Job ' + name + ' (' + ctx.instance.id + ') not published.') except Exception as exp: print(traceback.format_exc()) ctx.logger.error('Cannot publish: ' + exp.message)
def get_states(self, workdir, credentials, job_names, logger): # TODO set start time of consulting # (sacct only check current day) call = "sacct -n -o JobName,State -X -P --name=" + ','.join(job_names) client = SshClient(credentials) output, exit_code = client.execute_shell_command(call, workdir=workdir, wait_result=True) client.close_connection() states = {} if exit_code == 0: states = self._parse_states(output, logger) else: logger.warning("Failed to get states") return states
def deploy_job(script, inputs, credentials, wm_type, workdir, name, logger, skip_cleanup): # pylint: disable=W0613 """ Exec a deployment job script that receives SSH credentials as input """ wm = WorkloadManager.factory(wm_type) if not wm: raise NonRecoverableError("Workload Manager '" + wm_type + "' not supported.") # Execute the script and manage the output success = False client = SshClient(credentials) if wm._create_shell_script(client, name, ctx.get_resource(script), logger, workdir=workdir): call = "./" + name for dinput in inputs: str_input = str(dinput) if ('\n' in str_input or ' ' in str_input) and str_input[0] != '"': call += ' "' + str_input + '"' else: call += ' ' + str_input _, exit_code = client.execute_shell_command(call, workdir=workdir, wait_result=True) if exit_code != 0: logger.warning("failed to deploy job: call '" + call + "', exit code " + str(exit_code)) else: success = True if not skip_cleanup: if not client.execute_shell_command("rm " + name, workdir=workdir): logger.warning("failed removing bootstrap script") client.close_connection() return success
def cleanup_execution(config, credentials, skip, simulate, **kwargs): # pylint: disable=W0613 """ Cleans execution working directory """ if skip: return ctx.logger.info('Cleaning up...') if not simulate: workdir = ctx.instance.runtime_properties['workdir'] wm_type = config['workload_manager'] wm = WorkloadManager.factory(wm_type) if not wm: raise NonRecoverableError("Workload Manager '" + wm_type + "' not supported.") if 'credentials' in ctx.instance.runtime_properties: credentials = ctx.instance.runtime_properties['credentials'] client = SshClient(credentials) client.execute_shell_command('rm -r ' + workdir, wait_result=True) client.close_connection() ctx.logger.info('..all clean.') else: ctx.logger.warning('clean up simulated.')
def get_states(self, workdir, credentials, job_names, logger): states = {} frameinfo = getframeinfo(currentframe()) logger.debug("{2}: {0} - {1}".format(frameinfo.filename, frameinfo.lineno, frameinfo.function)) call = "curl http://{0}:`cat /security/secrets/{0}.mesos" + \ "`@localhost:5050/frameworks" for i in range(5): try: client = SshClient(credentials) user = client._user except AuthenticationException as ae: logger.debug(ae) import time time.sleep(5) continue call_format = call.format(user) logger.debug("{2}: cal_fmt: {0}, usr: {1}".format(call_format, user, frameinfo.function)) output, exit_code = client.execute_shell_command(call_format, workdir=workdir, wait_result=True) if exit_code == 0: json_output = json.loads(output) states = self._parse_frameworks_states(json_output, job_names[0], logger) else: logger.warning("failed to get states from {0}".format( call_format)) logger.debug("{0}: job_state:{1}".format(frameinfo.function, states)) client.close_connection() return states
def process_http_transfer(self): try: ctx.logger.info( 'Processing http data transfer from source {} to target {}'. format(self.dt_config['from_source']['name'], self.dt_config['to_target']['name'])) # Copy source data into target data by invoking wget command at target data infrastructure # Create wget command # Invoke command in target infrastructure # Source DS resource = self.dt_config['from_source']['resource'] endpoint = self.dt_config['from_source']['located_at']['endpoint'] url = resource if resource.startswith('http') else \ '{endpoint}/{resource}'.format(endpoint=endpoint[:-1] if endpoint.endswith('/') else endpoint, resource=resource[1:] if resource.startswith('/') else resource) # Target DS to_target_type = self.dt_config['to_target']['type'] to_target_data_url = None if 'FileDataSource' in to_target_type: to_target_data_url = self.dt_config['to_target']['filepath'] workdir = self.dt_config['to_target']['located_at']['workdir'] to_target_infra_credentials = self.dt_config['to_target'][ 'located_at']['credentials'] target_is_file = isFile(to_target_data_url) # Specifying target to copy using wget if target_is_file: wget_command = 'wget {url} -O {ds_target}'.format( url=url, ds_target=to_target_data_url) curl_command = 'curl {url} -o {ds_target}'.format( url=url, ds_target=to_target_data_url) else: wget_command = 'wget {url} -P {ds_target}'.format( url=url, ds_target=to_target_data_url) curl_command = 'cd {ds_target} & curl -O {url}'.format( url=url, ds_target=to_target_data_url) source_credentials = self.dt_config['from_source']['located_at'][ 'credentials'] if 'user' in source_credentials and 'password' in source_credentials and \ source_credentials['user'] and source_credentials['password']: user = source_credentials['user'] password = source_credentials['password'] wget_command += ' --user {0} --password {1}'.format( user, password) curl_command += ' -u {0}:{1}'.format(user, password) elif 'auth-header' in source_credentials and source_credentials[ 'auth-header']: auth_header = ' --header \'' + source_credentials[ 'auth-header-label'] + ': ' + source_credentials[ 'auth-header'] + '\'' wget_command += auth_header curl_command += auth_header ssh_client = SshClient(to_target_infra_credentials) # Execute data transfer command exit_msg, exit_code = ssh_client.execute_shell_command( wget_command, workdir=workdir, wait_result=True) if exit_code != 0: error_msg = 'Could not download using wget, trying with curl (exit code: {0}, error:{1})\n'.format( str(exit_code), exit_msg) ctx.logger.warning(error_msg) exit_msg, exit_code = ssh_client.execute_shell_command( curl_command, workdir=workdir, wait_result=True) if exit_code != 0: error_msg = 'Could not download using curl (exit code: {0}, error:{1})\n'.format( str(exit_code), exit_msg) raise CommandExecutionError(error_msg) else: ctx.logger.info("Data downloaded successfully with curl") else: ctx.logger.info("Data downloaded successfully with wget") except Exception as exp: ctx.logger.error( "There was a problem executing the data transfer: " + str(exp)) raise finally: if 'ssh_client' in locals(): ssh_client.close_connection()
def process_rsync_transfer(self, rsync_source_to_target): ssh_client = None ftp_client = None try: ctx.logger.info('Processing rsync data transfer') # Copy source data into target data by invoking rsync command at target data infrastructure Create rsync # command (check available credentials for target data infrastructure) If credential include # user/password, rsync command is: rsync -ratlz --rsh="/usr/bin/sshpass -p <passwd> ssh -o # StrictHostKeyChecking=no -o IdentitiesOnly=yes -l <user>" <source files to copy> <HPC remote # server>:<target folder> If credential include user/key, rsync command is: rsync -ratlz -e "ssh -o # IdentitiesOnly=yes -i <key_file>" <files to copy> <user>@<HPC remote server>:<target folder> Copy key # in temporary file and destroy it (whatsoever) after usage (or failure) Invoke command in target # infrastructure dt_command = None # Source DS from_source_type = self.dt_config['from_source']['type'] from_source_data_url = None if 'FileDataSource' in from_source_type: from_source_data_url = self.dt_config['from_source'][ 'filepath'] from_source_infra_endpoint = self.dt_config['from_source'][ 'located_at']['endpoint'] from_source_infra_credentials = self.dt_config['from_source'][ 'located_at']['credentials'] # Target DS to_target_type = self.dt_config['to_target']['type'] to_target_data_url = None if 'FileDataSource' in to_target_type: to_target_data_url = self.dt_config['to_target']['filepath'] to_target_infra_endpoint = self.dt_config['to_target'][ 'located_at']['endpoint'] to_target_infra_credentials = self.dt_config['to_target'][ 'located_at']['credentials'] if rsync_source_to_target: credentials = from_source_infra_credentials else: credentials = to_target_infra_credentials ssh_client = SshClient(credentials) ftp_client = SFtpClient(credentials) if rsync_source_to_target: if "user" in to_target_infra_credentials and "password" in to_target_infra_credentials: # NOTE rsync authentication with username/password requires sshpass which it is not installed # some HPC frontends target_username = to_target_infra_credentials['user'] target_password = to_target_infra_credentials['password'] dt_command = 'rsync -ratlz --rsh="/usr/bin/sshpass -p {password} ssh -o StrictHostKeyChecking=no ' \ '-o IdentitiesOnly=yes -l {username}" {ds_source} {target_endpoint}:{ds_target}'\ .format( username=target_username, password=target_password, target_endpoint=to_target_infra_endpoint, ds_source=from_source_data_url, ds_target=to_target_data_url ) elif "user" in to_target_infra_credentials and "private_key" in to_target_infra_credentials: target_username = to_target_infra_credentials['user'] target_key = to_target_infra_credentials['private_key'] # Save key in temporary file with tempfile.NamedTemporaryFile() as key_file: key_file.write(bytes(target_key, 'utf-8')) key_file.flush() key_filepath = key_file.name target_key_filepath = key_file.name.split('/')[-1] # Transfer key_file ftp_client.sendKeyFile(ssh_client, key_filepath, target_key_filepath) dt_command = 'rsync -ratlz -e "ssh -o IdentitiesOnly=yes -o StrictHostKeyChecking=no -i ~/{key_file}" {ds_source} ' \ '{username}@{target_endpoint}:{ds_target}'.format( username=target_username, key_file=target_key_filepath, target_endpoint=to_target_infra_endpoint, ds_source=from_source_data_url, ds_target=to_target_data_url ) else: if "user" in from_source_infra_credentials and "password" in from_source_infra_credentials: # NOTE rsync authentication with username/password requires sshpass which it is not installed # some HPC frontends source_username = from_source_infra_credentials['user'] source_password = from_source_infra_credentials['password'] dt_command = 'rsync -ratlz --rsh="/usr/bin/sshpass -p {password} ssh -o StrictHostKeyChecking=no ' \ '-o IdentitiesOnly=yes -l {username}" {source_endpoint}:{ds_source} {ds_target}'\ .format( username=source_username, password=source_password, source_endpoint=from_source_infra_endpoint, ds_source=from_source_data_url, ds_target=to_target_data_url ) elif "username" in from_source_infra_credentials and "private_key" in from_source_infra_credentials: source_username = from_source_infra_credentials['user'] source_key = from_source_infra_credentials['private_key'] # Save key in temporary file with tempfile.NamedTemporaryFile() as key_file: key_file.write(bytes(source_key, 'utf-8')) key_file.flush() key_filepath = key_file.name source_key_filepath = key_file.name.split('/')[-1] # Transfer key_file ftp_client.sendKeyFile(ssh_client, key_filepath, source_key_filepath) dt_command = 'rsync -ratlz -e "ssh -o IdentitiesOnly=yes -o StrictHostKeyChecking=no -i ~/{key_file}" ' \ '{username}@{source_endpoint}:{ds_source} {ds_target}'.format( username=source_username, key_file=source_key_filepath, source_endpoint=from_source_infra_endpoint, ds_source=from_source_data_url, ds_target=to_target_data_url ) # Execute data transfer command ctx.logger.info( 'rsync data transfer: executing command: {}'.format( dt_command)) exit_msg, exit_code = ssh_client.execute_shell_command( dt_command, wait_result=True) if exit_code != 0: raise CommandExecutionError( "Failed executing rsync data transfer: exit code " + str(exit_code) + " and msg: " + exit_msg) except Exception as exp: raise CommandExecutionError( "Failed trying to connect to data source infrastructure: " + str(exp)) finally: ftp_client.close_connection() ssh_client.close_connection()