示例#1
0
    def __init__(self,
                 reference_name,
                 annotation_name,
                 annotation_version=None,
                 decompress_on_download=False,
                 copy_local_files_to_cache=False,
                 install_string_function=None,
                 cache_directory_path=None):
        """
        Parameters
        ----------
        reference_name : str
            Name of reference genome

        annotation_name : str
            Name of annotation database

        annotation_version : str or int, optional
            Version or release of annotation database

        decompress_on_download : bool, optional
            If downloading a .fa.gz file, should we automatically expand it
            into a decompressed FASTA file?

        copy_local_files_to_cache : bool, optional
            If file is on the local file system, should we still copy it
            into the cache?

        install_string_function : fn, optional
            Function which returns an error message with
            install instructions. If not provided then the error tells the
            user what data is missing without install instructions.

        cache_directory_path : str, optional
            Where to place downloaded and temporary files, by default
            inferred from reference name, annotation name, annotation version,
            and the global cache directory determined by datacache.
        """

        self.reference_name = reference_name
        self.annotation_name = annotation_name
        self.annotation_version = annotation_version

        # using hidden member variable _cache_directory path since access to
        # to the visible cache_directory_path (no underscore!) is combined
        # with ensuring that the directpry actually exists
        if cache_directory_path:
            self._cache_directory_path = cache_directory_path
        else:
            self.cache_subdirectory = cache_subdirectory(
                reference_name=reference_name,
                annotation_name=annotation_name,
                annotation_version=annotation_version)

            self._cache_directory_path = datacache.get_data_dir(
                subdir=self.cache_subdirectory)

        self.decompress_on_download = decompress_on_download
        self.copy_local_files_to_cache = copy_local_files_to_cache
        self.install_string_function = install_string_function
def fm_index_path(genome):
    """
    Returns a path for cached reference peptides, for the given genome.
    """
    # if $VAXRANK_REF_PEPTIDES_DIR is set, that'll be the location of the cache
    cache_dir = get_data_dir(envkey='VAXRANK_REF_PEPTIDES_DIR')
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)

    return os.path.join(
        cache_dir, '%s_%d_%d.fm' %
        (genome.species.latin_name, genome.release, 2 if six.PY2 else 3))
示例#3
0
def make_blastdb(url, name=None, filename=None, overwrite=False):
    """Download protein sequences and a make blast db. Uses datacache module."""

    import datacache
    cachedir = datacache.get_data_dir()
    blastdb = os.path.join(cachedir, name)
    if os.path.exists(blastdb+'.phr') and overwrite==False:
        #print ('blast files found')
        return blastdb

    filename = datacache.fetch_file(url, filename=filename, decompress=True, subdir=None)
    #print filename
    cmd = 'makeblastdb -dbtype prot -in %s -out %s' %(filename,blastdb)
    #print cmd
    tmp=subprocess.check_output(cmd, shell=True)
    return blastdb
示例#4
0
    def __init__(
            self,
            reference_name,
            annotation_name,
            annotation_version=None,
            decompress_on_download=False,
            copy_local_files_to_cache=False,
            install_string_function=None,
            cache_directory_path=None):
        """
        Parameters
        ----------
        reference_name : str
            Name of reference genome

        annotation_name : str
            Name of annotation database

        annotation_version : str or int, optional
            Version or release of annotation database

        decompress_on_download : bool, optional
            If downloading a .fa.gz file, should we automatically expand it
            into a decompressed FASTA file?

        copy_local_files_to_cache : bool, optional
            If file is on the local file system, should we still copy it
            into the cache?

        install_string_function : fn, optional
            Function which returns an error message with
            install instructions. If not provided then the error tells the
            user what data is missing without install instructions.

        cache_directory_path : str, optional
            Where to place downloaded and temporary files, by default
            inferred from reference name, annotation name, annotation version,
            and the global cache directory determined by datacache.
        """

        self.reference_name = reference_name
        self.annotation_name = annotation_name
        self.annotation_version = annotation_version

        # using hidden member variable _cache_directory path since access to
        # to the visible cache_directory_path (no underscore!) is combined
        # with ensuring that the directpry actually exists
        if cache_directory_path:
            self._cache_directory_path = cache_directory_path
        else:
            self.cache_subdirectory = cache_subdirectory(
                reference_name=reference_name,
                annotation_name=annotation_name,
                annotation_version=annotation_version)

            # If `CACHE_DIR_ENV_KEY` is set, the cache will be saved there
            self._cache_directory_path = datacache.get_data_dir(
                subdir=self.cache_subdirectory,
                envkey=CACHE_DIR_ENV_KEY)

        self.decompress_on_download = decompress_on_download
        self.copy_local_files_to_cache = copy_local_files_to_cache
        self.install_string_function = install_string_function