def __init__( self, query_path, # The input sequences db_path=pfam.hmm_db, # The database to search seq_type='prot' or 'nucl', # The seq type of the query_path file e_value=0.001, # The search threshold params=None, # Add extra params for the command line out_path=None, # Where the results will be dropped executable=None, # If you want a specific binary give the path cpus=None): # The number of threads to use # Save attributes # self.query = FASTA(query_path) self.db = FilePath(db_path) self.params = params if params else {} self.e_value = e_value self.seq_type = seq_type self.executable = FilePath(executable) # Cores to use # if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32) else: self.cpus = cpus # Auto detect database short name # if db_path == 'pfam': self.db = pfam.hmm_db if db_path == 'tigrfam': self.db = tigrfam.hmm_db # Output # if out_path is None: self.out_path = FilePath(self.query.prefix_path + '.hmmout') elif out_path.endswith('/'): self.out_path = FilePath(out_path + self.query.prefix + '.hmmout') else: self.out_path = FilePath(out_path)
def save(self, **kw): # Load # df = self.df.copy() # Modify the index name# if self.capital_index and df.index.name is not None: df.index.name = df.index.name.capitalize() # Modify column names # if self.upper_columns: df.columns = df.columns.str.upper() # Possibility to overwrite path # if 'path' in kw: path = FilePath(kw['path']) else: path = self.path # Special cases for float formatting # if self.float_format_tex == 'split_thousands': self.float_format_tex = self.split_thousands # Make sure the directory exists # self.base_dir.create_if_not_exists() # Latex version # if 'tex' in self.formats: df.to_latex(str(path), float_format = self.float_format_tex, na_rep = self.na_rep, index = self.index, bold_rows = self.bold_rows, column_format = self.column_format, escape = self.escape_tex) # CSV version (plain text) # if 'csv' in self.formats: path = path.replace_extension('csv') df.to_csv(str(path), float_format = self.float_format_csv, index = self.index) # Return the path # return path
def files_to_retrieve(self): """The files we want to download with their destinations.""" result = OrderedDict() result[self.base_url + "protein.sequences.v9.1.fa.gz"] = FilePath( self.p.raw_proteins) result[self.base_url + "COG.mappings.v9.1.txt.gz"] = FilePath( self.p.raw_mappings) return result
def overwrite_cache(self, value): # Where should we look in the file system ? # if 'cache_dir' in self.__dict__: path = FilePath(self.__dict__['cache_dir'] + f.func_name + '.pickle') else: path = getattr(self.p, f.func_name) if value is None: path.remove() else: raise Exception("You can't set a pickled property, you can only delete it")
def files_to_retrieve(self): """The files we want to download with their destinations.""" if hasattr(self, "pattern"): files = self.ftp.listdir(self.ftp.curdir) files.sort(key=natural_sort) return OrderedDict((f, FilePath(self.autopaths.raw_dir + f)) for f in files if fnmatch.fnmatch(f, self.pattern)) if hasattr(self, "files"): return OrderedDict( (f, FilePath(self.autopaths.raw_dir + f)) for f in self.files)
def __init__(self, data_dir=None): # The directory that contains all databases # if data_dir is None: data_dir = home + 'databases/' # Base directory for paths # self.base_dir = data_dir + self.short_name + '/' self.autopaths = AutoPaths(self.base_dir, self.all_paths) # Location of zip file remotely # self.url = self.base_url + self.base_name + ".tgz" # Location of zip file locally # self.dest = self.autopaths.tgz # The results after download # prefix = self.base_dir + self.base_name + '/' + self.base_name self.alignment = FilePath(prefix + ".fasta") self.taxonomy = FilePath(prefix + ".tax")
def check_setup_py(path_of_setup): """ Parses the required modules from a `setup.py` file and checks they are importable and have the minimum required version installed. Some ideas for extracting dependency information from a `setup.py` file: https://stackoverflow.com/questions/24236266/ Instead let's try the `parsesetup` package. Note: The code in the setup.py will be evaluated. Other interesting projects: https://pypi.org/project/requirements-parser/ Typically you can use this function like this: >>> from plumbing.dependencies import check_setup_py >>> check_setup_py('~/module_name/setup.py') """ # First let's check we have that module # check_module('parsesetup') import parsesetup # Parse it # from autopaths.file_path import FilePath path_of_setup = FilePath(path_of_setup) # Run it # setup_args = parsesetup.parse_setup(path_of_setup, trusted=True) requires = setup_args.get('install_requires', []) # Parse it # requires = [re.split(r'==|>=', req) for req in requires] requires = [req if len(req) == 2 else (req[0], None) for req in requires] requires = dict(requires) # Loop # for package, version in requires.items(): check_module(package, version)
def build_tree_raxml(self, new_path = None, seq_type = 'nucl' or 'prot', num_threads = None, free_cores = 2, keep_dir = False): """Make a tree with RAxML.""" # Check output # if new_path is None: new_path = self.prefix_path + '.tree' # What model to choose # if seq_type == 'nucl': model = "GTRGAMMA" if seq_type == 'prot': model = "PROTGAMMAJTTF" # Threads # if num_threads is None: num_threads = multiprocessing.cpu_count() - free_cores else: num_threads = int(num_threads) - free_cores num_threads = max(1, num_threads) # Run it # temp_dir = new_temp_dir() sh.raxml811('-m', model, "-T", num_threads, '-p', 1, '-s', self.path, '-n', 'tree', '-w', temp_dir, '-f', 'a', '-x', 1, '-N', 'autoMR') # Move into place # if keep_dir: shutil.rmtree(new_path) shutil.move(temp_dir, new_path) if not keep_dir: shutil.move(temp_dir + 'RAxML_bestTree.tree', new_path) # Return # return FilePath(new_path)
def set_defaults(self): """ This method will replace empty attributes with defaults when this is needed. """ # In case we got a special object, just use the blast_db attribute # if self.algorithm == 'blast' and hasattr(self.database, 'blast_db'): self.database = self.database.blast_db if self.algorithm == 'vsearch' and hasattr(self.database, 'vsearch_db'): self.database = self.database.vsearch_db # Otherwise in case we got a path, convert it to a BLASTdb # if self.algorithm == 'blast' and not isinstance( self.database, BLASTdb): self.database = BLASTdb(self.database) # The filtering options # if self.filtering is None: self.filtering = {} # Output path default value # if self.out_path is None: self.out_path = self.input_fasta.prefix_path + '.' + \ self.algorithm + 'out' # Output path setting # self.out_path = FilePath(self.out_path) # Number of cores default value # if self.num_threads is None or self.num_threads is True: self.num_threads = min(multiprocessing.cpu_count(), 32) # Extra params to be given to the search algorithm # if self.params is None: self.params = {}
def index_bowtie(self): """Create an index on the fasta file compatible with bowtie2.""" # It returns exit code 1 if the fasta is empty # assert self # Call the bowtie executable # sh.bowtie2_build(self.path, self.path) return FilePath(self.path + '.1.bt2')
def to_qual(self, path, verbose=False): # Select verbosity # import tqdm wrapper = tqdm.tqdm if verbose else lambda x: x # Do it # with open(path, 'w') as handle: for r in wrapper(self): SeqIO.write(r, handle, 'qual') # Return # return FilePath(path)
def __init__(self, data_dir=None): # The directory that contains all databases # if data_dir is None: data_dir = home + 'databases/' # Base directory for paths # self.base_dir = DirectoryPath(data_dir + self.short_name + '/') self.autopaths = AutoPaths(self.base_dir, self.all_paths) # Location of zip file remotely # self.ref_url = self.base_url + "gg_13_8_99.refalign.tgz" self.tax_url = self.base_url + "gg_13_8_99.taxonomy.tgz" # Location of zip file locally # self.ref_dest = self.autopaths.alignment self.tax_dest = self.autopaths.taxonomy # The results after download # self.alignment = self.base_dir + "gg_13_8_99.refalign" self.taxonomy = self.base_dir + "gg_13_8_99.gg.tax" # Make them FilePaths objects # self.alignment = FilePath(self.alignment) self.taxonomy = FilePath(self.taxonomy)
def __init__(self, data_dir=None): # The directory that contains all databases # if data_dir is None: data_dir = home + 'databases/' # Base directory for paths # self.base_dir = data_dir + self.short_name + '/' self.autopaths = AutoPaths(self.base_dir, self.all_paths) # Location of zip file remotely # self.url = self.base_url + "silva.nr_v%s.tgz" % self.version # Location of zip file locally # self.dest = self.autopaths.tgz # The results after download # self.alignment = self.base_dir + "silva.nr_v%s.align" self.taxonomy = self.base_dir + "silva.nr_v%s.tax" # Make them FilePaths objects # self.alignment = FilePath(self.alignment % self.version) self.taxonomy = FilePath(self.taxonomy % self.version) # The part that mothur will use for naming files # self.nickname = "nr_v%s" % self.version
def __init__(self, query_path, db_path, seq_type = 'prot' or 'nucl', # The seq type of the query_path file params = None, # Add extra params for the command line algorithm = "blastn" or "blastp", # Will be auto-determined with seq_type out_path = None, # Where the results will be dropped executable = None, # If you want a specific binary give the path cpus = None, # The number of threads to use num = None, # When parallelized, the number of this thread _out = None, # Store the stdout at this path _err = None): # Store the stderr at this path # Main input # self.query = FASTA(query_path) # The database to search against # self.db = FilePath(db_path) # Other attributes # self.seq_type = seq_type self.algorithm = algorithm self.num = num self.params = params if params else {} # The standard output and error # self._out = _out self._err = _err # Output defaults # if out_path is None: self.out_path = self.query.prefix_path + self.extension elif out_path.endswith('/'): self.out_path = out_path + self.query.prefix + self.extension else: self.out_path = out_path # Make it a file path # self.out_path = FilePath(self.out_path) # Executable # self.executable = FilePath(executable) # Cores to use # if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32) else: self.cpus = cpus # Save the output somewhere # if self._out is True: self._out = self.out_path + '.stdout' if self._err is True: self._err = self.out_path + '.stderr'
def __init__(self, version, base_dir=None): # Attributes # self.version = version self.short_name = self.short_name + "_" + self.version # Base directory # if base_dir is None: base_dir = home self.base_dir = base_dir + 'databases/' + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # URL # self.url = self.base_url + self.version # The archive # self.dest = self.p.archive # The results # self.alignment = FilePath(self.base_dir + "pr_two.gb203_v%s.align" % self.version) self.taxonomy = FilePath(self.base_dir + "pr_two.gb203_v%s.tax" % self.version) # The part that mothur will use for naming files # self.nickname = "gb203_v%s" % self.version
def build_tree_fast(self, new_path=None, seq_type='nucl' or 'prot'): """Make a tree with FastTree. Names will be truncated however.""" # Check output # if new_path is None: new_path = self.prefix_path + '.tree' # Command # command_args = [] if seq_type == 'nucl': command_args += ['-nt'] command_args += ['-gamma'] command_args += ['-out', new_path] command_args += [self.path] # Run it # sh.FastTree(*command_args) # Return # return FilePath(new_path)
def set_paths(self, base_dir, script_path): """Set the directory, the script path and the outfile path""" # Make absolute paths # if 'change_dir' in self.kwargs: self.kwargs['change_dir'] = DirectoryPath( os.path.abspath(self.kwargs['change_dir'])) if 'out_file' in self.kwargs: self.kwargs['out_file'] = FilePath( os.path.abspath(self.kwargs['out_file'])) # In case there is a base directory # if base_dir is not None: self.base_dir = DirectoryPath(os.path.abspath(base_dir)) self.script_path = FilePath(base_dir + "run." + self.extensions[self.language]) self.kwargs['change_dir'] = base_dir self.kwargs['out_file'] = FilePath(base_dir + "run.out") # Other cases # if base_dir is None and script_path is None: self.script_path = FilePath(new_temp_path()) if script_path is not None: self.script_path = FilePath(os.path.abspath(script_path))
def mappings(self): """The cog mappings.""" return FilePath(self.p.unzipped_mappings)
def save_plot(self, fig=None, axes=None, **kwargs): # Missing figure # if fig is None: fig = pyplot.gcf() # Missing axes # if axes is None: axes = pyplot.gca() # Parameters # self.params = {} for key in self.default_params: if key in kwargs: self.params[key] = kwargs[key] elif hasattr(self, key): self.params[key] = getattr(self, key) elif self.default_params[key] is not None: self.params[key] = self.default_params[key] # Backwards compatibility # if kwargs.get('x_log', False): self.params['x_scale'] = 'symlog' if kwargs.get('y_log', False): self.params['y_scale'] = 'symlog' # Log # if 'x_scale' in self.params: axes.set_xscale(self.params['x_scale']) if 'y_scale' in self.params: axes.set_yscale(self.params['y_scale']) # Axis limits # if 'x_min' in self.params: axes.set_xlim(self.params['x_min'], axes.get_xlim()[1]) if 'x_max' in self.params: axes.set_xlim(axes.get_xlim()[0], self.params['x_max']) if 'y_min' in self.params: axes.set_ylim(self.params['y_min'], axes.get_ylim()[1]) if 'y_max' in self.params: axes.set_ylim(axes.get_ylim()[0], self.params['y_max']) # Minimum delta on axis limits # if 'y_lim_min' in self.params: top, bottom = axes.get_ylim() minimum = self.params['y_lim_min'] delta = top - bottom if delta < minimum: center = bottom + delta/2 axes.set_ylim(center - minimum/2, center + minimum/2) # Title # title = self.params.get('title', False) if title: axes.set_title(title) # Axes labels # if self.params.get('x_label'): axes.set_xlabel(self.params['x_label']) if self.params.get('y_label'): axes.set_ylabel(self.params['y_label']) # Set height and width # if self.params.get('width'): fig.set_figwidth(self.params['width']) if self.params.get('height'): fig.set_figheight(self.params['height']) # Adjust # if self.params.get('bottom'): fig.subplots_adjust(hspace=0.0, bottom = self.params['bottom'], top = self.params['top'], left = self.params['left'], right = self.params['right']) # Grid # if 'x_grid' in self.params: axes.xaxis.grid(self.params['x_grid']) if 'y_grid' in self.params: axes.yaxis.grid(self.params['y_grid']) # Data and source extra text # if hasattr(self, 'dev_mode') and self.dev_mode is True: fig.text(0.99, 0.98, time.asctime(), horizontalalignment='right') job_name = os.environ.get('SLURM_JOB_NAME', 'Unnamed') user_msg = 'user: %s, job: %s' % (getpass.getuser(), job_name) fig.text(0.01, 0.98, user_msg, horizontalalignment='left') # Nice digit grouping # if 'x' in self.params['sep']: separate = lambda x,pos: split_thousands(x) axes.xaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(separate)) if 'y' in self.params['sep']: separate = lambda y,pos: split_thousands(y) axes.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(separate)) # Add custom labels # if 'x_labels' in self.params: axes.set_xticklabels(self.params['x_labels']) if 'x_labels_rot' in self.params: pyplot.setp(axes.xaxis.get_majorticklabels(), rotation=self.params['x_labels_rot']) # Possibility to overwrite path # if 'path' in self.params: path = FilePath(self.params['path']) elif hasattr(self, 'path'): path = FilePath(self.path) else: path = FilePath(self.short_name + '.pdf') # Save it as different formats # for ext in self.params['formats']: fig.savefig(path.replace_extension(ext)) # Close it # pyplot.close(fig)
ipython -i -- ~/deploy/plumbing/tests/database/access_db/test_copy_table.py """ # Built-in module # import inspect, os # Internal modules # from plumbing.databases.access_database import AccessDatabase from autopaths.file_path import FilePath # Third party modules # import pandas # Constants # file_name = inspect.getframeinfo(inspect.currentframe()).filename this_dir = os.path.dirname(os.path.abspath(file_name)) + '/' # Never modify the original # orig_path = FilePath(this_dir + 'orig.mdb') testing_path = FilePath(this_dir + 'testing.mdb') orig_path.copy(testing_path) # The source database # source_db = AccessDatabase(testing_path) # The destination database # dest_path = FilePath(this_dir + 'copied_table.mdb') dest_db = AccessDatabase.create(dest_path) # Copy a table # dest_db.import_table(source_db, "tblClassifierSets")
def __init__(self, path): # Call parent constructor # FilePath.__init__(self, path) # Check if the corresponding FASTA exists # self.fasta_path = self.replace_extension('fasta')
Or in the Ubuntu WSL: ipython -i -- ~/deploy/plumbing/tests/database/access_db/test_conversion.py """ # Built-in module # import inspect, os # Internal modules # from plumbing.databases.access_database import AccessDatabase from autopaths.file_path import FilePath # Third party modules # import pandas # Constants # file_name = inspect.getframeinfo(inspect.currentframe()).filename this_dir = os.path.dirname(os.path.abspath(file_name)) + '/' # Never modify the original # orig_db = FilePath(this_dir + 'orig.mdb') testing_db = FilePath(this_dir + 'testing.mdb') orig_db.copy(testing_db) # The database # db = AccessDatabase(testing_db) # Convert # db.convert_to_sqlite()
class JobSLURM(object): """Makes launching SLURM jobs easy to write and easy to use. Here are some examples on how to use this class: for command in ['print "hi"', 'print "hello"']: job = JobSLURM(command, time='00:01:00', qos='short') job.run() for path in ['~/data/scafolds1.txt', '~/data/scafolds2.txt', '~/data/scafolds3.txt']: command = ['import sh\n'] command += ['script = sh.Command("analyze.py")\n'] command += ['script(%s)' % path] job = JobSLURM(command, time='00:01:00', qos='short', job_name=path[-25:]) job.run() print "Job %i is running !" % job.id Then you can easily check the status of your job and the standard out that is associated: print job.status print job.log_tail print job.info['time_left'] etc. """ extensions = {'bash': "sh", 'python': "py"} shebang_headers = { 'bash': ["#!/bin/bash -le"], # As a login shell and stop on error 'python': ["#!/usr/bin/env python"] } slurm_headers = OrderedDict(( ('job_name', { 'tag': '#SBATCH -J %s', 'needed': True }), ('change_dir', { 'tag': '#SBATCH -D %s', 'needed': True, 'default': os.path.abspath(os.getcwd()) }), ('out_file', { 'tag': '#SBATCH -o %s', 'needed': True, 'default': '/dev/null' }), ('project', { 'tag': '#SBATCH -A %s', 'needed': False, 'default': 'b2011035' }), ('time', { 'tag': '#SBATCH -t %s', 'needed': True, 'default': '7-00:00:00' }), ('machines', { 'tag': '#SBATCH -N %s', 'needed': True, 'default': '1' }), ('cores', { 'tag': '#SBATCH -n %s', 'needed': True, 'default': num_processors }), ('partition', { 'tag': '#SBATCH -p %s', 'needed': True, 'default': 'node' }), ('email', { 'tag': '#SBATCH --mail-user %s', 'needed': False, 'default': os.environ.get('EMAIL') }), ('email-when', { 'tag': '#SBATCH --mail-type=%s', 'needed': True, 'default': 'END' }), ('qos', { 'tag': '#SBATCH --qos=%s', 'needed': False, 'default': 'short' }), ('dependency', { 'tag': '#SBATCH -d %s', 'needed': False, 'default': 'afterok:1' }), ('constraint', { 'tag': '#SBATCH -C %s', 'needed': False, 'default': 'fat' }), ('cluster', { 'tag': '#SBATCH -M %s', 'needed': False, 'default': 'milou' }), ('alloc', { 'tag': '#SBATCH --reservation=%s', 'needed': False, 'default': 'workstation' }), ('jobid', { 'tag': '#SBATCH --jobid=%i', 'needed': False, 'default': 2173455 }), ('memory', { 'tag': '#SBATCH --mem=%i', 'needed': False, 'default': 120000 }), ('mem_per_cpu', { 'tag': '#SBATCH --mem-per-cpu=%i', 'needed': False, 'default': 512 }), ('threads', { 'tag': '#SBATCH --cpus-per-task=%i', 'needed': False, 'default': num_processors }), )) script_headers = { 'bash': ['echo "SLURM: start at $(date) on $(hostname)"'], 'python': [ 'import dateutil.tz, datetime, platform', 'now = datetime.datetime.now(dateutil.tz.tzlocal())', r'now = now.strftime("%Y-%m-%d %Hh%Mm%Ss %Z%z")', 'node = platform.node()', 'print "SLURM: start at {0} on {1}".format(now, node)' ] } script_footers = { 'bash': ['echo "SLURM: end at $(date)"'], 'python': [ 'now = datetime.datetime.now(dateutil.tz.tzlocal())', r'now = now.strftime("%Y-%m-%d %Hh%Mm%Ss %Z%z")', 'print "SLURM: end at {0}".format(now)' ] } def __repr__(self): return '<%s object "%s">' % (self.__class__.__name__, self.name) @property def name(self): return self.kwargs['job_name'] def __init__(self, command=["print 'Hello world'"], language='python', base_dir=None, script_path=None, **kwargs): # Required attributes # self.command = command self.language = language self.kwargs = kwargs # Set the file paths # self.set_paths(base_dir, script_path) # Check command type # if not isinstance(self.command, list): self.command = [self.command] # Get the name # if 'job_name' not in self.kwargs: hashed = hashlib.md5(''.join(self.command)).digest() encoded = base64.urlsafe_b64encode(hashed) self.kwargs['job_name'] = encoded # Check we have a project otherwise choose the one with less hours # if hostname.startswith('milou'): if 'project' not in self.kwargs and 'SBATCH_ACCOUNT' not in os.environ: if projects: self.kwargs['project'] = projects[0]['name'] def set_paths(self, base_dir, script_path): """Set the directory, the script path and the outfile path""" # Make absolute paths # if 'change_dir' in self.kwargs: self.kwargs['change_dir'] = DirectoryPath( os.path.abspath(self.kwargs['change_dir'])) if 'out_file' in self.kwargs: self.kwargs['out_file'] = FilePath( os.path.abspath(self.kwargs['out_file'])) # In case there is a base directory # if base_dir is not None: self.base_dir = DirectoryPath(os.path.abspath(base_dir)) self.script_path = FilePath(base_dir + "run." + self.extensions[self.language]) self.kwargs['change_dir'] = base_dir self.kwargs['out_file'] = FilePath(base_dir + "run.out") # Other cases # if base_dir is None and script_path is None: self.script_path = FilePath(new_temp_path()) if script_path is not None: self.script_path = FilePath(os.path.abspath(script_path)) @property_cached def slurm_params(self): """The list of parameters to give to the `sbatch` command.""" # Main loop # result = OrderedDict() for param, info in self.slurm_headers.items(): if not info['needed'] and not param in self.kwargs: continue if param in self.kwargs: result[param] = self.kwargs.get(param) else: result[param] = info['default'] # Special cases # if result.get('cluster') == 'halvan': result['partition'] = 'halvan' # Return # return result @property def script(self): """The script to be submitted to the SLURM queue.""" self.shebang_header = self.shebang_headers[self.language] self.slurm_header = [ self.slurm_headers[k]['tag'] % v for k, v in self.slurm_params.items() ] self.script_header = self.script_headers[self.language] self.script_footer = self.script_footers[self.language] return '\n'.join( flatter([ self.shebang_header, self.slurm_header, self.script_header, self.command, self.script_footer ])) def make_script(self): """Make the script and return a FilePath object pointing to the script above.""" self.script_path.write(self.script) self.script_path.permissions.make_executable() return self.script_path @property def log(self): """The log as a FilePath object""" return self.slurm_params['out_file'] @property def log_tail(self): """If we have a log file, what is its tail""" if not self.kwargs['out_file'].exists: return False else: return tail(self.slurm_params['out_file']) @property def status(self): """What is the status of the job ?""" # If there is no script it is either ready or a lost duplicate # if not self.script_path.exists: if self.name in jobs.names: return "DUPLICATE" if self.name not in jobs.names: return "READY" # It is submitted already # if self.name in jobs.names: if jobs[self.name]['type'] == 'queued': return "QUEUED" if jobs[self.name]['type'] == 'running': return "RUNNING" # So the script exists for sure but it is not in the queue # if not self.kwargs['out_file'].exists: return "ABORTED" # Let's look in log file # if 'CANCELED' in self.log_tail: return "CANCELLED" if 'slurmstepd: error' in self.log_tail: return "CANCELLED" # It all looks good # if 'SLURM: end at' in self.log_tail: return "FINISHED" # At this point we have no idea # return "INTERUPTED" @property def info(self): """Get the existing job information dictionary""" if self.name not in jobs: return {'status': self.status} else: return jobs[self.name] #-------------------------------------------------------------------------# def run(self): """Will call self.launch() after performing some checks""" # Check already exists # if self.status == "READY": return self.launch() # Check name conflict # if self.status == "DUPLICATE": message = "Job with same name '%s' already in queue, but we lost the script." if self.status == "QUEUED": message = "Job '%s' already in queue." if self.status == "RUNNING": message = "Job '%s' already running." if self.status == "FINISHED": message = "Job '%s' already ended successfully." if self.status == "ABORTED": message = "Job '%s' was killed without any output file (?)." if self.status == "CANCELED": message = "Job '%s' was canceled or killed while running." if self.status == "INTERUPTED": message = "Job '%s' is not running. We don't know why. Look at the log file." print(Color.i_red + message % (self.name, ) + Color.end) print("Job might have run already (?). Not starting.") def launch(self): """Make the script file and return the newly created job id""" # Make script file # self.make_script() # Do it # sbatch_out = sh.sbatch(self.script_path) jobs.expire() # Message # print(Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out), ) # Return id # self.id = int( re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0]) return self.id def cancel(self): if self.status != "QUEUED" and self.status != "RUNNING": raise Exception("Can't cancel job '%s'" % self.name) sh.scancel(self.info['jobid']) def wait(self): """Wait until the job is finished""" pass #-------------------------------------------------------------------------# def run_locally(self): """A convenience method to run the same result as a SLURM job but locally in a non-blocking way. Useful for testing.""" self.thread = threading.Thread(target=self.execute_locally) self.thread.daemon = True # So that they die when we die self.thread.start() def execute_locally(self): """Runs the equivalent command locally in a blocking way.""" # Make script file # self.make_script() # Do it # with open(self.kwargs['out_file'], 'w') as handle: sh.python(self.script_path, _out=handle, _err=handle) def wait_locally(self): """If you have run the query in a non-blocking way, call this method to pause until the query is finished.""" try: self.thread.join( sys.maxint) # maxint timeout so that we can Ctrl-C them except KeyboardInterrupt: print("Stopped waiting on job '%s'" % self.kwargs['job_name'])
def index_samtools(self): """Create an index on the fasta file compatible with samtools.""" sh.samtools('faidx', self.path) return FilePath(self.path + '.fai')
def new_temp_file(**kwargs): """A new temporary path as a FilePath object.""" handle = tempfile.NamedTemporaryFile(delete=False, **kwargs) path = handle.name handle.close() return FilePath(path)
def __init__(self, parent): self.parent = parent self.path = FilePath(self.parent.prefix_path + '_len_hist.pdf')