Пример #1
0
 def __init__(
     self,
     query_path,  # The input sequences
     db_path=pfam.hmm_db,  # The database to search
     seq_type='prot' or 'nucl',  # The seq type of the query_path file
     e_value=0.001,  # The search threshold
     params=None,  # Add extra params for the command line
     out_path=None,  # Where the results will be dropped
     executable=None,  # If you want a specific binary give the path
     cpus=None):  # The number of threads to use
     # Save attributes #
     self.query = FASTA(query_path)
     self.db = FilePath(db_path)
     self.params = params if params else {}
     self.e_value = e_value
     self.seq_type = seq_type
     self.executable = FilePath(executable)
     # Cores to use #
     if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
     else: self.cpus = cpus
     # Auto detect database short name #
     if db_path == 'pfam': self.db = pfam.hmm_db
     if db_path == 'tigrfam': self.db = tigrfam.hmm_db
     # Output #
     if out_path is None:
         self.out_path = FilePath(self.query.prefix_path + '.hmmout')
     elif out_path.endswith('/'):
         self.out_path = FilePath(out_path + self.query.prefix + '.hmmout')
     else:
         self.out_path = FilePath(out_path)
Пример #2
0
 def files_to_retrieve(self):
     """The files we want to download with their destinations."""
     result = OrderedDict()
     result[self.base_url + "protein.sequences.v9.1.fa.gz"] = FilePath(
         self.p.raw_proteins)
     result[self.base_url + "COG.mappings.v9.1.txt.gz"] = FilePath(
         self.p.raw_mappings)
     return result
Пример #3
0
 def files_to_retrieve(self):
     """The files we want to download with their destinations."""
     if hasattr(self, "pattern"):
         files = self.ftp.listdir(self.ftp.curdir)
         files.sort(key=natural_sort)
         return OrderedDict((f, FilePath(self.autopaths.raw_dir + f))
                            for f in files
                            if fnmatch.fnmatch(f, self.pattern))
     if hasattr(self, "files"):
         return OrderedDict(
             (f, FilePath(self.autopaths.raw_dir + f)) for f in self.files)
Пример #4
0
 def __init__(self, data_dir=None):
     # The directory that contains all databases #
     if data_dir is None: data_dir = home + 'databases/'
     # Base directory for paths #
     self.base_dir  = data_dir + self.short_name + '/'
     self.autopaths = AutoPaths(self.base_dir, self.all_paths)
     # Location of zip file remotely #
     self.url = self.base_url + self.base_name + ".tgz"
     # Location of zip file locally #
     self.dest = self.autopaths.tgz
     # The results after download #
     prefix = self.base_dir + self.base_name + '/' + self.base_name
     self.alignment = FilePath(prefix + ".fasta")
     self.taxonomy  = FilePath(prefix + ".tax")
Пример #5
0
def check_setup_py(path_of_setup):
    """
    Parses the required modules from a `setup.py` file and checks they are
    importable and have the minimum required version installed.

    Some ideas for extracting dependency information from a `setup.py` file:
    https://stackoverflow.com/questions/24236266/

    Instead let's try the `parsesetup` package.
    Note: The code in the setup.py will be evaluated.

    Other interesting projects:
    https://pypi.org/project/requirements-parser/

    Typically you can use this function like this:

        >>> from plumbing.dependencies import check_setup_py
        >>> check_setup_py('~/module_name/setup.py')
    """
    # First let's check we have that module #
    check_module('parsesetup')
    import parsesetup
    # Parse it #
    from autopaths.file_path import FilePath
    path_of_setup = FilePath(path_of_setup)
    # Run it #
    setup_args = parsesetup.parse_setup(path_of_setup, trusted=True)
    requires = setup_args.get('install_requires', [])
    # Parse it #
    requires = [re.split(r'==|>=', req) for req in requires]
    requires = [req if len(req) == 2 else (req[0], None) for req in requires]
    requires = dict(requires)
    # Loop #
    for package, version in requires.items():
        check_module(package, version)
Пример #6
0
 def build_tree_raxml(self,
                new_path    = None,
                seq_type    = 'nucl' or 'prot',
                num_threads = None,
                free_cores  = 2,
                keep_dir    = False):
     """Make a tree with RAxML."""
     # Check output #
     if new_path is None: new_path = self.prefix_path + '.tree'
     # What model to choose #
     if seq_type == 'nucl': model = "GTRGAMMA"
     if seq_type == 'prot': model = "PROTGAMMAJTTF"
     # Threads #
     if num_threads is None: num_threads = multiprocessing.cpu_count() - free_cores
     else:                   num_threads = int(num_threads) - free_cores
     num_threads = max(1, num_threads)
     # Run it #
     temp_dir = new_temp_dir()
     sh.raxml811('-m', model, "-T", num_threads, '-p', 1, '-s', self.path, '-n', 'tree', '-w', temp_dir, '-f', 'a', '-x', 1, '-N', 'autoMR')
     # Move into place #
     if keep_dir:
         shutil.rmtree(new_path)
         shutil.move(temp_dir, new_path)
     if not keep_dir:
         shutil.move(temp_dir + 'RAxML_bestTree.tree', new_path)
     # Return #
     return FilePath(new_path)
Пример #7
0
 def set_defaults(self):
     """
     This method will replace empty attributes with defaults when this is
     needed.
     """
     # In case we got a special object, just use the blast_db attribute #
     if self.algorithm == 'blast' and hasattr(self.database, 'blast_db'):
         self.database = self.database.blast_db
     if self.algorithm == 'vsearch' and hasattr(self.database,
                                                'vsearch_db'):
         self.database = self.database.vsearch_db
     # Otherwise in case we got a path, convert it to a BLASTdb #
     if self.algorithm == 'blast' and not isinstance(
             self.database, BLASTdb):
         self.database = BLASTdb(self.database)
     # The filtering options #
     if self.filtering is None: self.filtering = {}
     # Output path default value #
     if self.out_path is None:
         self.out_path = self.input_fasta.prefix_path + '.' + \
                         self.algorithm + 'out'
     # Output path setting #
     self.out_path = FilePath(self.out_path)
     # Number of cores default value #
     if self.num_threads is None or self.num_threads is True:
         self.num_threads = min(multiprocessing.cpu_count(), 32)
     # Extra params to be given to the search algorithm #
     if self.params is None: self.params = {}
 def index_bowtie(self):
     """Create an index on the fasta file compatible with bowtie2."""
     # It returns exit code 1 if the fasta is empty #
     assert self
     # Call the bowtie executable #
     sh.bowtie2_build(self.path, self.path)
     return FilePath(self.path + '.1.bt2')
Пример #9
0
 def save(self, **kw):
     # Load #
     df = self.df.copy()
     # Modify the index name#
     if self.capital_index and df.index.name is not None:
         df.index.name = df.index.name.capitalize()
     # Modify column names #
     if self.upper_columns: df.columns = df.columns.str.upper()
     # Possibility to overwrite path #
     if 'path' in kw: path = FilePath(kw['path'])
     else:            path = self.path
     # Special cases for float formatting #
     if self.float_format_tex == 'split_thousands':
         self.float_format_tex = self.split_thousands
     # Make sure the directory exists #
     self.base_dir.create_if_not_exists()
     # Latex version #
     if 'tex' in self.formats:
         df.to_latex(str(path),
                     float_format  = self.float_format_tex,
                     na_rep        = self.na_rep,
                     index         = self.index,
                     bold_rows     = self.bold_rows,
                     column_format = self.column_format,
                     escape        = self.escape_tex)
     # CSV version (plain text) #
     if 'csv' in self.formats:
         path = path.replace_extension('csv')
         df.to_csv(str(path),
                   float_format = self.float_format_csv,
                   index        = self.index)
     # Return the path #
     return path
Пример #10
0
 def __init__(self, data_dir=None):
     # The directory that contains all databases #
     if data_dir is None: data_dir = home + 'databases/'
     # Base directory for paths #
     self.base_dir = DirectoryPath(data_dir + self.short_name + '/')
     self.autopaths = AutoPaths(self.base_dir, self.all_paths)
     # Location of zip file remotely #
     self.ref_url = self.base_url + "gg_13_8_99.refalign.tgz"
     self.tax_url = self.base_url + "gg_13_8_99.taxonomy.tgz"
     # Location of zip file locally #
     self.ref_dest = self.autopaths.alignment
     self.tax_dest = self.autopaths.taxonomy
     # The results after download #
     self.alignment = self.base_dir + "gg_13_8_99.refalign"
     self.taxonomy = self.base_dir + "gg_13_8_99.gg.tax"
     # Make them FilePaths objects #
     self.alignment = FilePath(self.alignment)
     self.taxonomy = FilePath(self.taxonomy)
Пример #11
0
 def __init__(self, data_dir=None):
     # The directory that contains all databases #
     if data_dir is None: data_dir = home + 'databases/'
     # Base directory for paths #
     self.base_dir = data_dir + self.short_name + '/'
     self.autopaths = AutoPaths(self.base_dir, self.all_paths)
     # Location of zip file remotely #
     self.url = self.base_url + "silva.nr_v%s.tgz" % self.version
     # Location of zip file locally #
     self.dest = self.autopaths.tgz
     # The results after download #
     self.alignment = self.base_dir + "silva.nr_v%s.align"
     self.taxonomy = self.base_dir + "silva.nr_v%s.tax"
     # Make them FilePaths objects #
     self.alignment = FilePath(self.alignment % self.version)
     self.taxonomy = FilePath(self.taxonomy % self.version)
     # The part that mothur will use for naming files #
     self.nickname = "nr_v%s" % self.version
 def to_qual(self, path, verbose=False):
     # Select verbosity #
     import tqdm
     wrapper = tqdm.tqdm if verbose else lambda x: x
     # Do it #
     with open(path, 'w') as handle:
         for r in wrapper(self): SeqIO.write(r, handle, 'qual')
     # Return #
     return FilePath(path)
Пример #13
0
 def __init__(self, version, base_dir=None):
     # Attributes #
     self.version = version
     self.short_name = self.short_name + "_" + self.version
     # Base directory #
     if base_dir is None: base_dir = home
     self.base_dir = base_dir + 'databases/' + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # URL #
     self.url = self.base_url + self.version
     # The archive #
     self.dest = self.p.archive
     # The results #
     self.alignment = FilePath(self.base_dir +
                               "pr_two.gb203_v%s.align" % self.version)
     self.taxonomy = FilePath(self.base_dir +
                              "pr_two.gb203_v%s.tax" % self.version)
     # The part that mothur will use for naming files #
     self.nickname = "gb203_v%s" % self.version
Пример #14
0
 def __init__(self,
              query_path,
              db_path,
              seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
              params       = None,                 # Add extra params for the command line
              algorithm    = "blastn" or "blastp", # Will be auto-determined with seq_type
              out_path     = None,                 # Where the results will be dropped
              executable   = None,                 # If you want a specific binary give the path
              cpus         = None,                 # The number of threads to use
              num          = None,                 # When parallelized, the number of this thread
              _out         = None,                 # Store the stdout at this path
              _err         = None):                # Store the stderr at this path
     # Main input #
     self.query = FASTA(query_path)
     # The database to search against #
     self.db = FilePath(db_path)
     # Other attributes #
     self.seq_type     = seq_type
     self.algorithm    = algorithm
     self.num          = num
     self.params       = params if params else {}
     # The standard output and error #
     self._out         = _out
     self._err         = _err
     # Output defaults #
     if out_path is None:
         self.out_path = self.query.prefix_path + self.extension
     elif out_path.endswith('/'):
         self.out_path = out_path + self.query.prefix + self.extension
     else:
         self.out_path = out_path
     # Make it a file path #
     self.out_path = FilePath(self.out_path)
     # Executable #
     self.executable = FilePath(executable)
     # Cores to use #
     if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
     else:            self.cpus = cpus
     # Save the output somewhere #
     if self._out is True:
         self._out = self.out_path + '.stdout'
     if self._err is True:
         self._err = self.out_path + '.stderr'
Пример #15
0
 def set_paths(self, base_dir, script_path):
     """Set the directory, the script path and the outfile path"""
     # Make absolute paths #
     if 'change_dir' in self.kwargs:
         self.kwargs['change_dir'] = DirectoryPath(
             os.path.abspath(self.kwargs['change_dir']))
     if 'out_file' in self.kwargs:
         self.kwargs['out_file'] = FilePath(
             os.path.abspath(self.kwargs['out_file']))
     # In case there is a base directory #
     if base_dir is not None:
         self.base_dir = DirectoryPath(os.path.abspath(base_dir))
         self.script_path = FilePath(base_dir + "run." +
                                     self.extensions[self.language])
         self.kwargs['change_dir'] = base_dir
         self.kwargs['out_file'] = FilePath(base_dir + "run.out")
     # Other cases #
     if base_dir is None and script_path is None:
         self.script_path = FilePath(new_temp_path())
     if script_path is not None:
         self.script_path = FilePath(os.path.abspath(script_path))
Пример #16
0
 def build_tree_fast(self, new_path=None, seq_type='nucl' or 'prot'):
     """Make a tree with FastTree. Names will be truncated however."""
     # Check output #
     if new_path is None: new_path = self.prefix_path + '.tree'
     # Command #
     command_args = []
     if seq_type == 'nucl': command_args += ['-nt']
     command_args += ['-gamma']
     command_args += ['-out', new_path]
     command_args += [self.path]
     # Run it #
     sh.FastTree(*command_args)
     # Return #
     return FilePath(new_path)
Пример #17
0
 def mappings(self):
     """The cog mappings."""
     return FilePath(self.p.unzipped_mappings)
 def index_samtools(self):
     """Create an index on the fasta file compatible with samtools."""
     sh.samtools('faidx', self.path)
     return FilePath(self.path + '.fai')
 def __init__(self, parent):
     self.parent = parent
     self.path = FilePath(self.parent.prefix_path + '_len_hist.pdf')
Пример #20
0
def new_temp_file(**kwargs):
    """A new temporary path as a FilePath object."""
    handle = tempfile.NamedTemporaryFile(delete=False, **kwargs)
    path = handle.name
    handle.close()
    return FilePath(path)