def update_args(self, override_args): """Update the argument used to invoke the application Note that this will also update the dictionary of input and output files. Parameters ----------- override_args : dict Dictionary of arguments to override the current values """ self.args = extract_arguments(override_args, self.args, self.mapping) self._latch_file_info() scratch_dir = self.args.get('scratch', None) if scratch_dir is not None and scratch_dir != 'None': self._file_stage = FileStageManager(scratch_dir, '.')
def update_args(self, override_args): """Update the argument used to invoke the application Note that this will also update the dictionary of input and output files. Parameters ----------- override_args : dict Dictionary of arguments to override the current values """ self.args = extract_arguments(override_args, self.args, self.mapping) self._latch_file_info() scratch_dir = self.args.get('scratch', None) if scratch_dir is not None: self._file_stage = FileStageManager(scratch_dir, '.')
def update_args(self, override_args): """Update the argument used to invoke the application Note that this will also update the dictionary of input and output files. Parameters ----------- override_args : dict dictionary passed to the links """ self.args = extract_arguments(override_args, self.args) self._map_arguments(self.args) scratch_dir = self.args.get('scratch', None) if is_not_null(scratch_dir): self._file_stage = FileStageManager(scratch_dir, '.') for link in self._links.values(): link._set_file_stage(self._file_stage) self._latch_file_info()
class Link(object): """A wrapper for a command line application. This class keeps track for the arguments to pass to the application as well as input and output files. This can be used either with other Link to build a chain, or as as standalone wrapper to pass conifguration to the application. Parameters ---------- appname : str Name of the application args : dict Up-to-date dictionary with the arguments that will be passed to the application defaults : dict Dictionary with defaults values for the arguments options : dict Dictionary with the options that we are allowed to set and default values mapping : dict Dictionary remapping keys in options to arguments sent to the application This is useful when two ScienceTools use different names for what is effectively the same parameter files : `FileDict` Object that keeps track of input and output files jobs : `OrderedDict` Dictionary mapping keys to `JobDetails` """ def __init__(self, linkname, **kwargs): """ C'tor Parameters ----------- linkname : str Unique name of this particular link Keyword arguments ----------- appname : str Name of the application (e.g., gtbin) parser: `argparse.ArguemntParser' Parser with the options that we are allow to set and default values options : dict Dictionary with the tuples defining that we are allowed to set and default values mapping : dict Dictionary remapping input argument names This is useful when two `Link` use different names for what is effectively the same parameter file_args : dict Dictionary mapping argument to `FileFlags' enum file_stage : `FileStageManager` Manager for staging files to and from a scratch area """ self.linkname = linkname self.appname = kwargs.pop('appname', linkname) self.mapping = kwargs.pop('mapping', {}) self._parser = kwargs.pop('parser', None) self._file_stage = kwargs.pop('file_stage', None) self._options = {} self._options.update(kwargs.pop('options', {})) if self._parser is not None: self.fill_argparser(self._parser) self.args = {} self.args.update(convert_option_dict_to_dict(self._options)) self.files = FileDict(**kwargs) self.sub_files = FileDict() self.jobs = OrderedDict() def print_summary(self, stream=sys.stdout, indent="", recurse_level=2): """Print a summary of the activity done by this `Link`. Parameters ----------- stream : `file` Stream to print to indent : str Indentation at start of line recurse_level : int Number of recursion levels to print """ if recurse_level < 0: return stream.write("%sLink: %s\n" % (indent, self.linkname)) stream.write("%sN_jobs: %s\n" % (indent, len(self.get_jobs()))) self.sub_files.print_chain_summary(stream, indent) def _get_args(self): """Internal function to cast self._options into dictionary Returns dict with argument key : value pairings """ args = {} args.update(convert_option_dict_to_dict(self._options)) return args def _latch_file_info(self): """Internal function to update the dictionaries keeping track of input and output files """ self.files.file_dict.clear() self.files.latch_file_info(self.args) def update_options(self, input_dict): """Update the values in self.options Parameters ---------- input_dict : dict Dictionary with argument key : value pairings Inserts values into self._options """ for key, value in input_dict.items(): new_tuple = (value, self._options[key][1], self._options[key][2]) self._options[key] = new_tuple def update_args(self, override_args): """Update the argument used to invoke the application Note that this will also update the dictionary of input and output files. Parameters ----------- override_args : dict Dictionary of arguments to override the current values """ self.args = extract_arguments(override_args, self.args, self.mapping) self._latch_file_info() scratch_dir = self.args.get('scratch', None) if scratch_dir is not None: self._file_stage = FileStageManager(scratch_dir, '.') def get_failed_jobs(self): """Return a dictionary with the subset of jobs that are marked as failed""" failed_jobs = {} for job_key, job_details in self.jobs.items(): if job_details.status == JobStatus.failed: failed_jobs[job_key] = job_details return failed_jobs def get_jobs(self, recursive=True): """Return a dictionary with all the jobs If recursive is True this will include jobs from internal `Link` """ if recursive: ret_dict = self.jobs.copy() return ret_dict else: return self.jobs @property def arg_names(self): """Return the list of arg names """ return [self.args.keys()] def update_sub_file_dict(self, sub_files): """Update a file dict with information from self""" sub_files.file_dict.clear() for job_details in self.jobs.values(): if job_details.file_dict is not None: sub_files.update(job_details.file_dict) if job_details.sub_file_dict is not None: sub_files.update(job_details.sub_file_dict) def check_input_files(self, return_found=True, return_missing=True): """Check if input files exist. Return two lists: (found, missing) """ all_input_files = self.files.chain_input_files + self.sub_files.chain_input_files return check_files(all_input_files, self._file_stage, return_found, return_missing) def check_output_files(self, return_found=True, return_missing=True): """Check if output files exist. Return two lists: (found, missing) """ all_output_files = self.files.chain_output_files + self.sub_files.chain_output_files return check_files(all_output_files, self._file_stage, return_found, return_missing) def missing_input_files(self): """Make and return a dictionary of the missing input files. This returns a dictionary mapping filepath to list of links that use the file as input. """ missing = self.check_input_files(return_found=False) ret_dict = {} for miss_file in missing: ret_dict[miss_file] = [self.linkname] return ret_dict def missing_output_files(self): """Make and return a dictionary of the missing output files. This returns a dictionary mapping filepath to list of links that product the file as output. """ missing = self.check_output_files(return_found=False) ret_dict = {} for miss_file in missing: ret_dict[miss_file] = [self.linkname] return ret_dict def formatted_command(self): """Build and return the formatted command for this `Link`. This is exactly the command as called from the Unix command line. """ command_template = self.command_template() format_dict = self.args.copy() command = command_template.format(**format_dict) return command def make_argv(self): """Generate the vector of arguments for this `Link`. This is exactly the 'argv' generated for the command as called from the Unix command line. """ command = self.formatted_command() tokens = command.split() return tokens[1:] def pre_run_checks(self, stream=sys.stdout, dry_run=False): """Do some checks before running this link This checks if input and output files are present. If input files are missing this will raise `OSError` if dry_run is False If all output files are present this return False. Parameters ----------- stream : `file` Must have 'write' function dry_run : bool Print command but do not run it Returns bool True if it is ok to proceed with running the link """ input_missing = self.check_input_files(return_found=False) if len(input_missing) != 0: if dry_run: stream.write("Input files are missing: %s: %i\n" % (self.linkname, len(input_missing))) else: raise OSError("Input files are missing: %s" % input_missing) output_found, output_missing = self.check_output_files() if len(output_missing) == 0: stream.write("All output files for %s already exist: %i %i %i\n" % (self.linkname, len(output_found), len(output_missing), len(self.files.output_files))) if dry_run: pass else: return False return True def set_file_stage(self, file_stage): """Set this link to use a `FileStageManager` to copy files to and from a scratch area """ self._file_stage = file_stage def run_link(self, stream=sys.stdout, dry_run=False, stage_files=True): """Runs this link. This checks if input and output files are present. If input files are missing this will raise `OSError` if dry_run is False If all output files are present this will skip execution. Parameters ----------- stream : `file` Must have 'write' function dry_run : bool Print command but do not run it stage_files : bool Stage files to and from the scratch area """ check_ok = self.pre_run_checks(stream, dry_run) if not check_ok: return if self._file_stage is not None: input_file_mapping, output_file_mapping = self.map_scratch_files( self.files) if stage_files: self._file_stage.make_scratch_dirs(input_file_mapping, dry_run) self._file_stage.make_scratch_dirs( output_file_mapping, dry_run) self.stage_input_files(input_file_mapping, dry_run) self.run_command(stream, dry_run) if self._file_stage is not None and stage_files: self.stage_output_files(output_file_mapping, dry_run) self.finalize(dry_run) def run_command(self, stream=sys.stdout, dry_run=False): """Runs the command for this link. This method can be overridden by sub-classes to invoke a different command Parameters ----------- stream : `file` Must have 'write' function dry_run : bool Print command but do not run it """ command = self.formatted_command() if dry_run: stream.write("%s\n" % command) stream.flush() else: os.system(command) def run(self, stream=sys.stdout, dry_run=False): """Runs this link. This version is intended to be overwritten by sub-classes so as to provide a single function that behaves the same for all version of `Link` Parameters ----------- stream : `file` Must have 'write' function dry_run : bool Print command but do not run it """ self.run_link(stream, dry_run) def command_template(self): """Build and return a string that can be used as a template invoking this chain from the command line. The actual command can be obtainted by using `self.command_template().format(**self.args)` """ com_out = self.appname arg_string = "" flag_string = "" # Loop over the key, value pairs in self.args for key, val in self.args.items(): # Check if the value is set in self._options # If so, get the value from there if val is None: opt_val = self._options[key][0] else: opt_val = val opt_type = self._options[key][2] if key == 'args': # 'args' is special, pull it out and move it to the back arg_string += ' {%s}' % key elif opt_type is bool: if opt_val: flag_string += ' --%s' % (key) elif opt_type is list: if opt_val is None: continue elif isinstance(opt_val, str): arg_string += ' %s' % opt_val elif isinstance(opt_val, list): for arg_val in opt_val: arg_string += ' %s' % arg_val else: com_out += ' --%s {%s}' % (key, key) com_out += flag_string com_out += arg_string return com_out def run_argparser(self, argv): """Initialize a link with a set of arguments using an `argparser.ArgumentParser` """ if self._parser is None: raise ValueError('Link was not given a parser on initialization') args = self._parser.parse_args(argv) self.update_args(args.__dict__) return args def fill_argparser(self, parser): """Fill an `argparser.ArgumentParser` with the options from this chain """ for key, val in self._options.items(): add_argument(parser, key, val) def create_job_details(self, key, job_config, logfile, status): """Create a `JobDetails` for a single job Parameters ---------- key : str Key used to identify this particular job job_config : dict Dictionary with arguements passed to this particular job logfile : str Name of the associated log file status : int Current status of the job Returns `JobDetails` """ self.update_args(job_config) job_details = JobDetails(jobname=self.linkname, jobkey=key, appname=self.appname, logfile=logfile, job_config=job_config, timestamp=get_timestamp(), file_dict=copy.deepcopy(self.files), sub_file_dict=copy.deepcopy(self.sub_files), status=status) return job_details def register_job(self, key, job_config, logfile, status): """Create a `JobDetails` for this link and add it to the self.jobs dictionary. Parameters ---------- key : str Key used to identify this particular job job_config : dict Dictionary with arguments passed to this particular job logfile : str Name of the associated log file status : int Current status of the job Returns `JobDetails` """ job_details = self.create_job_details(key, job_config, logfile, status) self.jobs[job_details.fullkey] = job_details return job_details def map_scratch_files(self, file_dict): """Build and return the mapping for copying files to and from scratch area""" if self._file_stage is None: return ({}, {}) input_files = file_dict.input_files_to_stage output_files = file_dict.output_files_to_stage input_file_mapping = self._file_stage.map_files(input_files) output_file_mapping = self._file_stage.map_files(output_files) self.update_file_args(input_file_mapping) self.update_file_args(output_file_mapping) return input_file_mapping, output_file_mapping def update_file_args(self, file_mapping): """Adjust the arguments to deal with staging files to the scratch area""" for key, value in self.args.items(): new_value = file_mapping.get(value, value) if new_value != value: self.args[key] = new_value def stage_input_files(self, file_mapping, dry_run=True): """Stage the input files to the scratch area and adjust the arguments accordingly""" print ("Staging input ", file_mapping) if self._file_stage is None: return self._file_stage.copy_to_scratch(file_mapping, dry_run) def stage_output_files(self, file_mapping, dry_run=True): """Stage the input files to the scratch area and adjust the arguments accordingly""" print ("Staging output ", file_mapping) if self._file_stage is None: return self._file_stage.copy_from_scratch(file_mapping, dry_run) def finalize(self, dry_run=False): """Remove / compress files as requested """ for rmfile in self.files.temp_files: if dry_run: print ("remove %s" % rmfile) else: os.remove(rmfile) for gzfile in self.files.gzip_files: if dry_run: print ("gzip %s" % gzfile) else: os.system('gzip -9 %s' % gzfile)
class Link(object): """A wrapper for a command line application. This class keeps track for the arguments to pass to the application as well as input and output files. This can be used either with other Link to build a chain, or as as standalone wrapper to pass conifguration to the application. Parameters ---------- appname : str Name of the application args : dict Up-to-date dictionary with the arguments that will be passed to the application defaults : dict Dictionary with defaults values for the arguments options : dict Dictionary with the options that we are allowed to set and default values mapping : dict Dictionary remapping keys in options to arguments sent to the application This is useful when two ScienceTools use different names for what is effectively the same parameter files : `FileDict` Object that keeps track of input and output files jobs : `OrderedDict` Dictionary mapping keys to `JobDetails` """ def __init__(self, linkname, **kwargs): """ C'tor Parameters ----------- linkname : str Unique name of this particular link Keyword arguments ----------- appname : str Name of the application (e.g., gtbin) parser: `argparse.ArguemntParser' Parser with the options that we are allow to set and default values options : dict Dictionary with the tuples defining that we are allowed to set and default values mapping : dict Dictionary remapping input argument names This is useful when two `Link` use different names for what is effectively the same parameter file_args : dict Dictionary mapping argument to `FileFlags' enum file_stage : `FileStageManager` Manager for staging files to and from a scratch area """ self.linkname = linkname self.appname = kwargs.pop('appname', linkname) self.mapping = kwargs.pop('mapping', {}) self._parser = kwargs.pop('parser', None) self._options = {} self._options.update(kwargs.pop('options', {})) if self._parser is not None: self.fill_argparser(self._parser) self._file_stage = kwargs.get('file_stage', None) self._job_archive = kwargs.get('job_archive', None) self.args = {} self.args.update(convert_option_dict_to_dict(self._options)) self.files = FileDict(**kwargs) self.sub_files = FileDict() self.jobs = OrderedDict() def print_summary(self, stream=sys.stdout, indent="", recurse_level=2): """Print a summary of the activity done by this `Link`. Parameters ----------- stream : `file` Stream to print to indent : str Indentation at start of line recurse_level : int Number of recursion levels to print """ if recurse_level < 0: return stream.write("%sLink: %s\n" % (indent, self.linkname)) stream.write("%sN_jobs: %s\n" % (indent, len(self.get_jobs()))) self.sub_files.print_chain_summary(stream, indent) def _get_args(self): """Internal function to cast self._options into dictionary Returns dict with argument key : value pairings """ args = {} args.update(convert_option_dict_to_dict(self._options)) return args def _latch_file_info(self): """Internal function to update the dictionaries keeping track of input and output files """ self.files.file_dict.clear() self.files.latch_file_info(self.args) def update_options(self, input_dict): """Update the values in self.options Parameters ---------- input_dict : dict Dictionary with argument key : value pairings Inserts values into self._options """ for key, value in input_dict.items(): new_tuple = (value, self._options[key][1], self._options[key][2]) self._options[key] = new_tuple def update_args(self, override_args): """Update the argument used to invoke the application Note that this will also update the dictionary of input and output files. Parameters ----------- override_args : dict Dictionary of arguments to override the current values """ self.args = extract_arguments(override_args, self.args, self.mapping) self._latch_file_info() scratch_dir = self.args.get('scratch', None) if scratch_dir is not None and scratch_dir != 'None': self._file_stage = FileStageManager(scratch_dir, '.') def get_failed_jobs(self, fail_running=False): """Return a dictionary with the subset of jobs that are marked as failed""" failed_jobs = {} for job_key, job_details in self.jobs.items(): if job_details.status == JobStatus.failed: failed_jobs[job_key] = job_details elif job_details.status == JobStatus.partial_failed: failed_jobs[job_key] = job_details elif fail_running and job_details.status == JobStatus.running: failed_jobs[job_key] = job_details return failed_jobs def check_job_status(self, key='__top__', fail_running=False): """Check the status of a particular job""" if self.jobs.has_key(key): return self.jobs[key].status else: return JobStatus.no_job def check_jobs_status(self, fail_running=False): """Check the status of all the jobs run from this link """ n_failed = 0 n_passed = 0 n_total = 0 for job_key, job_details in self.jobs.items(): n_total += 1 if job_details.status == JobStatus.failed: n_failed += 1 elif job_details.status == JobStatus.partial_failed: n_failed += 1 elif fail_running and job_details.status == JobStatus.running: n_failed += 1 elif job_details.status == JobStatus.done: n_passed += 1 if n_failed > 0: if n_passed > 0: return JobStatus.partial_failed else: return JobStatus.failed elif n_passed == n_total: return JobStatus.done elif n_passed > 0: return JobStatus.running return JobStatus.pending def get_jobs(self, recursive=True): """Return a dictionary with all the jobs If recursive is True this will include jobs from internal `Link` """ if recursive: ret_dict = self.jobs.copy() return ret_dict else: return self.jobs @property def arg_names(self): """Return the list of arg names """ return [self.args.keys()] def update_sub_file_dict(self, sub_files): """Update a file dict with information from self""" sub_files.file_dict.clear() for job_details in self.jobs.values(): if job_details.file_dict is not None: sub_files.update(job_details.file_dict) if job_details.sub_file_dict is not None: sub_files.update(job_details.sub_file_dict) def check_input_files(self, return_found=True, return_missing=True): """Check if input files exist. Return two lists: (found, missing) """ all_input_files = self.files.chain_input_files + self.sub_files.chain_input_files return check_files(all_input_files, self._file_stage, return_found, return_missing) def check_output_files(self, return_found=True, return_missing=True): """Check if output files exist. Return two lists: (found, missing) """ all_output_files = self.files.chain_output_files + self.sub_files.chain_output_files return check_files(all_output_files, self._file_stage, return_found, return_missing) def missing_input_files(self): """Make and return a dictionary of the missing input files. This returns a dictionary mapping filepath to list of links that use the file as input. """ missing = self.check_input_files(return_found=False) ret_dict = {} for miss_file in missing: ret_dict[miss_file] = [self.linkname] return ret_dict def missing_output_files(self): """Make and return a dictionary of the missing output files. This returns a dictionary mapping filepath to list of links that product the file as output. """ missing = self.check_output_files(return_found=False) ret_dict = {} for miss_file in missing: ret_dict[miss_file] = [self.linkname] return ret_dict def formatted_command(self): """Build and return the formatted command for this `Link`. This is exactly the command as called from the Unix command line. """ command_template = self.command_template() format_dict = self.args.copy() for key, value in format_dict.items(): # protect whitespace if isinstance(value, list) and key == 'args': outstr = "" for lval in list: outstr += ' ' outstr += lval format_dict[key] = '"%s"' % outstr elif isinstance(value, str) and value.find(' ') >= 0 and key != 'args': format_dict[key] = '"%s"' % value command = command_template.format(**format_dict) return command def make_argv(self): """Generate the vector of arguments for this `Link`. This is exactly the 'argv' generated for the command as called from the Unix command line. """ command = self.formatted_command() tokens = command.split() return tokens[1:] def pre_run_checks(self, stream=sys.stdout, dry_run=False): """Do some checks before running this link This checks if input and output files are present. If input files are missing this will raise `OSError` if dry_run is False If all output files are present this return False. Parameters ----------- stream : `file` Must have 'write' function dry_run : bool Print command but do not run it Returns bool True if it is ok to proceed with running the link """ input_missing = self.check_input_files(return_found=False) if len(input_missing) != 0: if dry_run: stream.write("Input files are missing: %s: %i\n" % (self.linkname, len(input_missing))) else: raise OSError("Input files are missing: %s" % input_missing) output_found, output_missing = self.check_output_files() if len(output_missing) == 0 and len(output_found) > 0: stream.write("All output files for %s already exist: %i %i %i\n" % (self.linkname, len(output_found), len(output_missing), len(self.files.output_files))) if dry_run: pass else: pass #return False return True def set_file_stage(self, file_stage): """Set this link to use a `FileStageManager` to copy files to and from a scratch area """ self._file_stage = file_stage def run_link(self, stream=sys.stdout, dry_run=False, stage_files=True): """Runs this link. This checks if input and output files are present. If input files are missing this will raise `OSError` if dry_run is False If all output files are present this will skip execution. Parameters ----------- stream : `file` Must have 'write' function dry_run : bool Print command but do not run it stage_files : bool Stage files to and from the scratch area """ check_ok = self.pre_run_checks(stream, dry_run) if not check_ok: return if self._file_stage is not None: input_file_mapping, output_file_mapping = self.map_scratch_files( self.files) if stage_files: self._file_stage.make_scratch_dirs(input_file_mapping, dry_run) self._file_stage.make_scratch_dirs(output_file_mapping, dry_run) self.stage_input_files(input_file_mapping, dry_run) self.run_command(stream, dry_run) if self._file_stage is not None and stage_files: self.stage_output_files(output_file_mapping, dry_run) self.finalize(dry_run) if dry_run: return self.write_status_to_log(stream, JobStatus.done) self.set_status_self(status=JobStatus.done) def run_command(self, stream=sys.stdout, dry_run=False): """Runs the command for this link. This method can be overridden by sub-classes to invoke a different command Parameters ----------- stream : `file` Must have 'write' function dry_run : bool Print command but do not run it """ command = self.formatted_command() if dry_run: stream.write("%s\n" % command) stream.flush() else: print(command) os.system(command) def register_self(self, logfile, key="__top__", status=JobStatus.unknown): """Runs this link, captures output to logfile, and records the job in self.jobs""" if self.jobs.has_key(key): job_details = self.jobs[key] job_details.status = status else: job_details = self.register_job(key, self.args, logfile, status) def archive_self(self, logfile, key="__top__", status=JobStatus.unknown): """Write this link to the job archive""" self.register_self(logfile, key, status) if self._job_archive is None: return self._job_archive.register_jobs(self.get_jobs()) def set_status_self(self, key="__top__", status=JobStatus.unknown): """ Set the status of this job """ if self.jobs.has_key(key): self.jobs[key].status = status if self._job_archive: self._job_archive.register_job(self.jobs[key]) else: self.register_self('dummy.log', key, status) def write_status_to_log(self, stream=sys.stdout, status=JobStatus.unknown): """ Write the status of this job to a log stream """ stream.write("Timestamp: %i\n" % get_timestamp()) if status == JobStatus.no_job: stream.write("No Job\n") else: stream.write("%s\n" % JOB_STATUS_STRINGS[status]) def run(self, stream=sys.stdout, dry_run=False, stage_files=True): """Runs this link. This version is intended to be overwritten by sub-classes so as to provide a single function that behaves the same for all version of `Link` Parameters ----------- stream : `file` Must have 'write' function dry_run : bool Print command but do not run it """ self.run_link(stream, dry_run, stage_files) def command_template(self): """Build and return a string that can be used as a template invoking this chain from the command line. The actual command can be obtainted by using `self.command_template().format(**self.args)` """ com_out = self.appname arg_string = "" flag_string = "" # Loop over the key, value pairs in self.args for key, val in self.args.items(): # Check if the value is set in self._options # If so, get the value from there if val is None: opt_val = self._options[key][0] else: opt_val = val opt_type = self._options[key][2] if key == 'args': # 'args' is special, pull it out and move it to the back arg_string += ' {%s}' % key elif opt_type is bool: if opt_val: flag_string += ' --%s' % (key) elif opt_type is list: if opt_val is None: continue elif isinstance(opt_val, str): arg_string += ' %s' % opt_val elif isinstance(opt_val, list): for arg_val in opt_val: arg_string += ' %s' % arg_val else: com_out += ' --%s {%s}' % (key, key) com_out += flag_string com_out += arg_string return com_out def run_argparser(self, argv): """Initialize a link with a set of arguments using an `argparser.ArgumentParser` """ if self._parser is None: raise ValueError('Link was not given a parser on initialization') args = self._parser.parse_args(argv) self.update_args(args.__dict__) return args def fill_argparser(self, parser): """Fill an `argparser.ArgumentParser` with the options from this chain """ for key, val in self._options.items(): add_argument(parser, key, val) def create_job_details(self, key, job_config, logfile, status): """Create a `JobDetails` for a single job Parameters ---------- key : str Key used to identify this particular job job_config : dict Dictionary with arguements passed to this particular job logfile : str Name of the associated log file status : int Current status of the job Returns `JobDetails` """ self.update_args(job_config) job_details = JobDetails(jobname=self.linkname, jobkey=key, appname=self.appname, logfile=logfile, job_config=job_config, timestamp=get_timestamp(), file_dict=copy.deepcopy(self.files), sub_file_dict=copy.deepcopy(self.sub_files), status=status) return job_details def register_job(self, key, job_config, logfile, status): """Create a `JobDetails` for this link and add it to the self.jobs dictionary. Parameters ---------- key : str Key used to identify this particular job job_config : dict Dictionary with arguments passed to this particular job logfile : str Name of the associated log file status : int Current status of the job Returns `JobDetails` """ job_details = self.create_job_details(key, job_config, logfile, status) self.jobs[key] = job_details return job_details def map_scratch_files(self, file_dict): """Build and return the mapping for copying files to and from scratch area""" if self._file_stage is None: return ({}, {}) input_files = file_dict.input_files_to_stage output_files = file_dict.output_files_to_stage input_file_mapping = self._file_stage.map_files(input_files) output_file_mapping = self._file_stage.map_files(output_files) self.update_file_args(input_file_mapping) self.update_file_args(output_file_mapping) return input_file_mapping, output_file_mapping def update_file_args(self, file_mapping): """Adjust the arguments to deal with staging files to the scratch area""" for key, value in self.args.items(): new_value = file_mapping.get(value, value) if new_value != value: self.args[key] = new_value def stage_input_files(self, file_mapping, dry_run=True): """Stage the input files to the scratch area and adjust the arguments accordingly""" #print ("Staging input ", file_mapping) if self._file_stage is None: return self._file_stage.copy_to_scratch(file_mapping, dry_run) def stage_output_files(self, file_mapping, dry_run=True): """Stage the input files to the scratch area and adjust the arguments accordingly""" #print ("Staging output ", file_mapping) if self._file_stage is None: return self._file_stage.copy_from_scratch(file_mapping, dry_run) def finalize(self, dry_run=False): """Remove / compress files as requested """ for rmfile in self.files.temp_files: if dry_run: print("remove %s" % rmfile) else: os.remove(rmfile) for gzfile in self.files.gzip_files: if dry_run: #print ("gzip %s" % gzfile) pass else: os.system('gzip -9 %s' % gzfile)