def cleanup_execution( config, credentials, skip, simulate, **kwargs): # pylint: disable=W0613 """ Cleans execution working directory """ if skip: return ctx.logger.info('Cleaning up...') if not simulate: workdir = ctx.instance.runtime_properties['workdir'] wm_type = config['workload_manager'] wm = WorkloadManager.factory(wm_type) if not wm: raise NonRecoverableError( "Workload Manager '" + wm_type + "' not supported.") if 'credentials' in ctx.instance.runtime_properties: credentials = ctx.instance.runtime_properties['credentials'] client = SshClient(credentials) client.execute_shell_command( 'rm -r ' + workdir, wait_result=True) client.close_connection() ctx.logger.info('..all clean.') else: ctx.logger.warning('clean up simulated.')
def configure_execution( config, credentials, base_dir, workdir_prefix, simulate, **kwargs): # pylint: disable=W0613 """ Creates the working directory for the execution """ ctx.logger.info('Connecting to workload manager..') if not simulate: wm_type = config['workload_manager'] ctx.logger.info(' - manager: {wm_type}'.format(wm_type=wm_type)) wm = WorkloadManager.factory(wm_type) if not wm: raise NonRecoverableError( "Workload Manager '" + wm_type + "' not supported.") if 'credentials' in ctx.instance.runtime_properties: credentials = ctx.instance.runtime_properties['credentials'] try: client = SshClient(credentials) except Exception as exp: raise NonRecoverableError( "Failed trying to connect to workload manager: " + str(exp)) # TODO: use command according to wm _, exit_code = client.execute_shell_command( 'uname', wait_result=True) if exit_code is not 0: client.close_connection() raise NonRecoverableError( "Failed executing on the workload manager: exit code " + str(exit_code)) ctx.instance.runtime_properties['login'] = exit_code is 0 prefix = workdir_prefix if workdir_prefix is "": prefix = ctx.blueprint.id workdir = wm.create_new_workdir(client, base_dir, prefix, ctx.logger) client.close_connection() if workdir is None: raise NonRecoverableError( "failed to create the working directory, base dir: " + base_dir) ctx.instance.runtime_properties['workdir'] = workdir ctx.logger.info('..workload manager ready to be used on ' + workdir) else: ctx.logger.info(' - [simulation]..') ctx.instance.runtime_properties['login'] = True ctx.instance.runtime_properties['workdir'] = "simulation" ctx.logger.warning('Workload manager connection simulated')
def send_job(job_options, **kwargs): # pylint: disable=W0613 """ Sends a job to the workload manager """ ctx.logger.info('TASKS.PY::SEND_JOB L455') simulate = ctx.instance.runtime_properties['simulate'] name = kwargs['name'] is_singularity = 'hpc.nodes.SingularityJob' in ctx.node.\ type_hierarchy if not simulate: workdir = ctx.instance.runtime_properties['workdir'] wm_type = ctx.instance.runtime_properties['workload_manager'] if wm_type != 'K8S': client = SshClient(ctx.instance.runtime_properties['credentials']) else: client = None wm = WorkloadManager.factory(wm_type) if not wm: if wm_type != 'K8S': client.close_connection() raise NonRecoverableError( "Workload Manager '" + wm_type + "' not supported.") context_vars = { 'CFY_EXECUTION_ID': ctx.execution_id, 'CFY_JOB_NAME': name } is_submitted = wm.submit_job(client, name, job_options, is_singularity, ctx.logger, workdir=workdir, context=context_vars) if wm_type != 'K8S': client.close_connection() else: ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated') is_submitted = True if is_submitted: ctx.logger.info('Job ' + name + ' (' + ctx.instance.id + ') sent.') else: ctx.logger.error( 'Job ' + name + ' (' + ctx.instance.id + ') not sent.') raise NonRecoverableError( 'Job ' + name + ' (' + ctx.instance.id + ') not sent.') ctx.instance.runtime_properties['job_name'] = name ctx.logger.info('TASKS.PY::SEND_JOB *** END *** L517')
def cleanup_job(job_options, skip, **kwargs): # pylint: disable=W0613 ctx.logger.info('TASKS.PY::PRECONFIGURE_WM L503') """Clean the aux files of the job""" if skip: return try: simulate = ctx.instance.runtime_properties['simulate'] except KeyError: # The job wasn't configured properly, so no cleanup needed ctx.logger.warning('Job was not cleaned up as it was not configured.') try: name = kwargs['name'] if not simulate: is_singularity = 'hpc.nodes.SingularityJob' in ctx.node.\ type_hierarchy workdir = ctx.instance.runtime_properties['workdir'] wm_type = ctx.instance.runtime_properties['workload_manager'] client = SshClient(ctx.instance.runtime_properties['credentials']) wm = WorkloadManager.factory(wm_type) if not wm: client.close_connection() raise NonRecoverableError( "Workload Manager '" + wm_type + "' not supported.") is_clean = wm.clean_job_aux_files(client, name, job_options, is_singularity, ctx.logger, workdir=workdir) client.close_connection() else: ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated') is_clean = True if is_clean: ctx.logger.info( 'Job ' + name + ' (' + ctx.instance.id + ') cleaned.') else: ctx.logger.error('Job ' + name + ' (' + ctx.instance.id + ') not cleaned.') except Exception as exp: print(traceback.format_exc()) ctx.logger.error( 'Something happend when trying to clean up: ' + exp.message) ctx.logger.info('TASKS.PY::PRECONFIGURE_WM L573')
def stop_job(job_options, **kwargs): # pylint: disable=W0613 ctx.logger.info('TASKS.PY::STOP_JOB L557') """ Stops a job in the workload manager """ try: simulate = ctx.instance.runtime_properties['simulate'] except KeyError: # The job wasn't configured properly, no need to be stopped ctx.logger.warning('Job was not stopped as it was not configured.') try: name = kwargs['name'] is_singularity = 'hpc.nodes.SingularityJob' in ctx.node.\ type_hierarchy if not simulate: workdir = ctx.instance.runtime_properties['workdir'] wm_type = ctx.instance.runtime_properties['workload_manager'] client = SshClient(ctx.instance.runtime_properties['credentials']) wm = WorkloadManager.factory(wm_type) if not wm: client.close_connection() raise NonRecoverableError( "Workload Manager '" + wm_type + "' not supported.") is_stopped = wm.stop_job(client, name, job_options, is_singularity, ctx.logger, workdir=workdir) client.close_connection() else: ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated') is_stopped = True if is_stopped: ctx.logger.info( 'Job ' + name + ' (' + ctx.instance.id + ') stopped.') else: ctx.logger.error('Job ' + name + ' (' + ctx.instance.id + ') not stopped.') raise NonRecoverableError('Job ' + name + ' (' + ctx.instance.id + ') not stopped.') except Exception as exp: print(traceback.format_exc()) ctx.logger.error( 'Something happend when trying to stop: ' + exp.message) ctx.logger.info('TASKS.PY::STOP_JOB L628')
def deploy_job(script, inputs, credentials, wm_type, workdir, name, logger, skip_cleanup): # pylint: disable=W0613 """ Exec a deployment job script that receives SSH credentials as input """ wm = WorkloadManager.factory(wm_type) if not wm: raise NonRecoverableError( "Workload Manager '" + wm_type + "' not supported.") # Execute the script and manage the output success = False client = SshClient(credentials) if wm._create_shell_script(client, name, ctx.get_resource(script), logger, workdir=workdir): call = "./" + name for dinput in inputs: str_input = str(dinput) if ('\n' in str_input or ' ' in str_input) and str_input[0] != '"': call += ' "' + str_input + '"' else: call += ' ' + str_input _, exit_code = client.execute_shell_command( call, workdir=workdir, wait_result=True) if exit_code is not 0: logger.warning( "failed to deploy job: call '" + call + "', exit code " + str(exit_code)) else: success = True if not skip_cleanup: if not client.execute_shell_command( "rm " + name, workdir=workdir): logger.warning("failed removing bootstrap script") client.close_connection() return success
def cleanup_execution( config, credentials, skip, simulate, **kwargs): # pylint: disable=W0613 """ Cleans execution working directory """ ctx.logger.info('TASKS.PY::CLEANUP_EXECUTION L148') if skip: ctx.logger.info('TASKS.PY::CLEANUP_EXECUTION *** SKIP *** L154') return ctx.logger.info('Cleaning up...') if not simulate: workdir = ctx.instance.runtime_properties['workdir'] wm_type = config['workload_manager'] wm = WorkloadManager.factory(wm_type) if not wm: raise NonRecoverableError( "Workload Manager '" + wm_type + "' not supported.") if 'credentials' in ctx.instance.runtime_properties: credentials = ctx.instance.runtime_properties['credentials'] if wm_type != 'K8S': client = SshClient(credentials) client.execute_shell_command( 'rm -r ' + workdir, wait_result=True) client.close_connection() else: try: os.rmdir(workdir) except Exception as exp: print(traceback.format_exc()) ctx.logger.error( 'Something happend when trying to clean up: ' + exp.message) ctx.logger.info('..all clean.') else: ctx.logger.warning('clean up simulated.') ctx.logger.info('TASKS.PY::CLEANUP_EXECUTION *** END *** L179')
def __init__(self, methodName='runTest'): super(TestTorque, self).__init__(methodName) self.wm = WorkloadManager.factory("TORQUE") self.logger = logging.getLogger('TestTorque')
def __init__(self, methodName='runTest'): super(TestSlurm, self).__init__(methodName) self.wm = WorkloadManager.factory("SLURM") self.logger = logging.getLogger('TestSlurm')
def __init__(self, methodName='runTest'): super(TestK8s, self).__init__(methodName) self.wm = WorkloadManager.factory("K8S") self.logger = logging.getLogger('TestK8s')