示例#1
0
文件: blast.py 项目: ascendo/anvio
    def __init__(self, query_fasta, run=run, progress=progress, num_threads=1, overwrite_output_destinations=False):
        self.run = run
        self.progress = progress

        self.num_threads = num_threads
        self.evalue = 1e-05
        self.overwrite_output_destinations = overwrite_output_destinations

        utils.is_program_exists('makeblastdb')
        utils.is_program_exists('blastp')

        self.tmp_dir = tempfile.gettempdir()

        self.query_fasta = query_fasta
        self.target_db_path = 'blast-target'
        self.search_output_path = 'blast-search-results.txt'


        if not self.run.log_file_path:
            self.run.log_file_path = 'blast-log-file.txt'


        # if names_dict is None, all fine. if not, the query_fasta is assumed to be uniqued, and names_dict is
        # the dictionary that connects the ids in the fasta file, to ids that were identical to it.
        self.names_dict = None
示例#2
0
    def __init__(self, run=run, progress=progress):
        self.run = run
        self.progress = progress

        self.program_name = 'DAS_Tool'

        utils.is_program_exists(self.program_name)
示例#3
0
    def check_version(self, use_version=None):
        """checks the installed version of eggnog-mapper, sets the parser"""

        utils.is_program_exists(self.executable)

        if not use_version:
            output, ret_code = utils.get_command_output_from_shell(
                '%s --version' % self.executable)
            version_found = output.split('\n')[0].split('-')[1]
        else:
            version_found = use_version

        if version_found not in self.available_parsers:
            if use_version:
                raise ConfigError("Anvi'o does not know about the version you requested. Here are the ones available: %s" % \
                                                        (', '.join(list(self.available_parsers.keys()))))
            else:
                raise ConfigError("Bad news :( This version of anvi'o does not have a parser for the eggnog-mapper installed\
                                    on your system. This is the version you have on your system (if this looks totally alien\
                                    to you it may indicate another problem, in which case consider writing to anvi'o developers):\
                                    %s. For your reference, these are the versions anvi'o knows what to do with: %s"                                                                                                                     % \
                                                        (version_found, ', '.join(list(self.available_parsers.keys()))))

        self.installed_version = version_found
        self.parser = self.available_parsers[version_found]
示例#4
0
    def __init__(self, query_fasta, target_fasta=None, run=run, progress=progress, num_threads=1, overwrite_output_destinations=False):
        self.run = run
        self.progress = progress

        self.num_threads = num_threads
        self.overwrite_output_destinations = overwrite_output_destinations

        utils.is_program_exists('diamond')

        self.tmp_dir = tempfile.gettempdir()
        self.evalue = 1e-05
        self.max_target_seqs = 100000

        self.query_fasta = query_fasta
        self.target_fasta = target_fasta

        if not self.target_fasta:
            self.target_fasta = self.query_fasta
        elif self.target_db_path:
            self.target_fasta = self.target_db_path

        self.search_output_path = 'diamond-search-results'
        self.tabular_output_path = 'diamond-search-results.txt'

        if not self.run.log_file_path:
            self.run.log_file_path = 'diamond-log-file.txt'

        self.sensitive = False

        # if names_dict is None, all fine. if not, the query_fasta is assumed to be uniqued, and names_dict is
        # the dictionary that connects the ids in the fasta file, to ids that were identical to it.
        self.names_dict = None
示例#5
0
    def __init__(self,
                 query_fasta,
                 run=run,
                 progress=progress,
                 num_threads=1,
                 overwrite_output_destinations=False):
        self.run = run
        self.progress = progress

        self.num_threads = num_threads
        self.overwrite_output_destinations = overwrite_output_destinations

        utils.is_program_exists('diamond')

        self.tmp_dir = tempfile.gettempdir()
        self.evalue = 1e-05
        self.max_target_seqs = 100000

        self.query_fasta = query_fasta
        self.target_db_path = 'diamond-target'
        self.search_output_path = 'diamond-search-results'
        self.tabular_output_path = 'diamond-search-results.txt'

        if not self.run.log_file_path:
            self.run.log_file_path = 'diamond-log-file.txt'

        self.sensitive = False

        # if names_dict is None, all fine. if not, the query_fasta is assumed to be uniqued, and names_dict is
        # the dictionary that connects the ids in the fasta file, to ids that were identical to it.
        self.names_dict = None
示例#6
0
    def check_version(self):
        """checks the installed version of eggnog-mapper, sets the parser"""

        if self.annotation and not self.use_version:
            raise ConfigError("You must provide a version number to use if you have your own annotations.")
        elif not self.annotation and self.use_version:
            raise ConfigError("If you are not providing any annotations, you must let anvi'o figure out what\
                               version of emapper to use.")


        if self.annotation:
            version_to_use = self.use_version
            pass
        else:
            utils.is_program_exists(self.executable)
            output, ret_code = utils.get_command_output_from_shell('%s --version' % self.executable)
            version_to_use = output.split('\n')[0].split('-')[1]

        if version_to_use not in self.available_parsers:
            if self.annotation:
                raise ConfigError("Anvi'o does not know about the version you requested. Here are the ones available: %s" % \
                                                        (', '.join(list(self.available_parsers.keys()))))
            else:
                raise ConfigError("Bad news :( This version of anvi'o does not have a parser for the eggnog-mapper installed\
                                    on your system. This is the version you have on your system (if this looks totally alien\
                                    to you it may indicate another problem, in which case consider writing to anvi'o developers):\
                                    %s. For your reference, these are the versions anvi'o knows what to do with: %s" % \
                                                        (version_to_use, ', '.join(list(self.available_parsers.keys()))))

        self.version_to_use = version_to_use 
        self.parser = self.available_parsers[version_to_use]
示例#7
0
    def check_programs(self):
        utils.is_program_exists(self.program_name)

        output, ret_code = utils.get_command_output_from_shell(
            '%s -h' % self.program_name)

        try:
            version_found = output.split(b'\n')[1].split()[1].split(
                b':')[0].lower().decode("utf-8")
            self.run.info('%s version found' % self.program_name,
                          version_found,
                          mc="green",
                          nl_after=1)
        except:
            version_found = 'Unknown'
            self.run.warning(
                "Anvi'o failed to learn the version of %s installed on this system :/"
            )

        if version_found not in self.tested_versions:
            self.run.warning("The version of %s installed on your system ('%s') is not one of those that we tested its anvi'o driver\
                              with. Anvi'o will continue to try to run everything as if this didn't happen. If you see this warning\
                              but everything works fine, let us know so we can include this version number into the list of 'tested'\
                              version numbers. If you see an unexpexted error, please consider installing one of these versions\
                              of tRNAScan-SE (and again please let us know anyway so we can address it for later): '%s'"                                                                                                                         % \
                                           (self.program_name, version_found, ', '.join(list(self.tested_versions))))

        self.installed_version = version_found
示例#8
0
    def check_version(self):
        """checks the installed version of eggnog-mapper, sets the parser"""

        if self.annotation and not self.use_version:
            raise ConfigError("You must provide a version number to use if you have your own annotations.")
        elif not self.annotation and self.use_version:
            raise ConfigError("If you are not providing any annotations, you must let anvi'o figure out what "
                              "version of emapper to use.")


        if self.annotation:
            version_to_use = self.use_version
            pass
        else:
            utils.is_program_exists(self.executable)
            output, ret_code = utils.get_command_output_from_shell('%s --version' % self.executable)
            version_to_use = output.split('\n')[0].split('-')[1]

        if version_to_use not in self.available_parsers:
            if self.annotation:
                raise ConfigError("Anvi'o does not know about the version you requested. Here are the ones available: %s" % \
                                                        (', '.join(list(self.available_parsers.keys()))))
            else:
                raise ConfigError("Bad news :( This version of anvi'o does not have a parser for the eggnog-mapper installed "
                                   "on your system. This is the version you have on your system (if this looks totally alien "
                                   "to you it may indicate another problem, in which case consider writing to anvi'o developers): "
                                   "%s. For your reference, these are the versions anvi'o knows what to do with: %s" % \
                                                        (version_to_use, ', '.join(list(self.available_parsers.keys()))))

        self.version_to_use = version_to_use 
        self.parser = self.available_parsers[version_to_use]
示例#9
0
    def format_protein_db(self, input_file_path, output_file_path):
        progress.new('Formatting raw files')
        progress.update('Decompressing protein sequences')

        # poor man's uncompress
        temp_fasta_path = filesnpaths.get_temp_file_path()
        with open(temp_fasta_path,
                  'wb') as f_out, gzip.open(input_file_path, 'rb') as f_in:
            f_out.write(f_in.read())

        progress.end()

        if utils.is_program_exists('diamond', dont_raise=True):
            output_dir = J(self.COG_data_dir, 'DB_DIAMOND')
            if os.path.exists(output_dir):
                shutil.rmtree(output_dir)

            os.mkdir(output_dir)

            output_db_path = J(output_dir, 'COG')
            log_file_path = J(output_dir, 'log.txt')

            self.run.info('Diamond log', log_file_path)

            diamond = Diamond(temp_fasta_path)
            diamond.num_threads = self.num_threads
            diamond.run.log_file_path = log_file_path
            diamond.makedb(output_db_path)
        else:
            self.run.warning(
                "Diamond does not seem to be installed on this system, so anvi'o is not going to\
                              generate a search database for it. Remember this when/if things go South."
            )

        if utils.is_program_exists(
                'makeblastdb', dont_raise=True) and utils.is_program_exists(
                    'blastp', dont_raise=True):
            output_dir = J(self.COG_data_dir, 'DB_BLAST')
            if os.path.exists(output_dir):
                shutil.rmtree(output_dir)

            os.mkdir(output_dir)

            output_db_path = J(output_dir, 'COG')
            log_file_path = J(output_dir, 'log.txt')

            self.run.info('BLAST log', log_file_path)

            blast = BLAST(temp_fasta_path)
            blast.run.log_file_path = log_file_path
            blast.num_threads = self.num_threads
            blast.makedb(os.path.join(output_db_path, 'COG.fa'))
        else:
            self.run.warning(
                "BLAST tools do not seem to be installed on this system, so anvi'o is not going to\
                              generate a search database for them to be used. Keep this in mind for later."
            )

        os.remove(temp_fasta_path)
示例#10
0
    def format_protein_db(self, input_file_path, output_file_path):
        progress.new('Formatting raw files')
        progress.update('Decompressing protein sequences')

        # poor man's uncompress
        temp_fasta_path = filesnpaths.get_temp_file_path()
        try:
            with open(temp_fasta_path, 'wb') as f_out, gzip.open(input_file_path, 'rb') as f_in:
                f_out.write(f_in.read())
        except Exception as e:
            progress.end()
            raise ConfigError(f"Something went wrong while decompressing the downloaded file :/ It is likely that "
                              f"the download failed and only part of the file was downloaded. If you would like to "
                              f"try again, please run the setup command with the flag `--reset`. Here is what the "
                              f"downstream library said: '{e}'.")

        progress.end()

        if utils.is_program_exists('diamond', dont_raise=True):
            output_dir = J(self.COG_data_dir, 'DB_DIAMOND')
            if os.path.exists(output_dir):
                shutil.rmtree(output_dir)

            os.mkdir(output_dir)

            output_db_path = J(output_dir, 'COG')
            log_file_path = J(output_dir, 'log.txt')

            self.run.info('Diamond log', log_file_path)

            diamond = Diamond(temp_fasta_path)
            diamond.num_threads = self.num_threads
            diamond.run.log_file_path = log_file_path
            diamond.makedb(output_db_path)
        else:
            self.run.warning("DIAMOND does not seem to be installed on this system, so anvi'o is not going to "
                             "generate a search database for it. Remember this when/if things go South.")

        if utils.is_program_exists('makeblastdb', dont_raise=True) and utils.is_program_exists('blastp', dont_raise=True):
            output_dir = J(self.COG_data_dir, 'DB_BLAST')
            if os.path.exists(output_dir):
                shutil.rmtree(output_dir)

            os.mkdir(output_dir)

            output_db_path = J(output_dir, 'COG')
            log_file_path = J(output_dir, 'log.txt')

            self.run.info('BLAST log', log_file_path)

            blast = BLAST(temp_fasta_path)
            blast.run.log_file_path = log_file_path
            blast.num_threads = self.num_threads
            blast.makedb(os.path.join(output_db_path, 'COG.fa'))
        else:
            self.run.warning("BLAST tools do not seem to be installed on this system, so anvi'o is not going to "
                             "generate a search database for them to be used. Keep this in mind for later.")

        os.remove(temp_fasta_path)
示例#11
0
    def __init__(self, progress=progress, run=run, program_name='muscle'):
        """A class to streamline HMM runs."""
        self.progress = progress
        self.run = run

        self.program_name = program_name

        utils.is_program_exists(self.program_name)
示例#12
0
    def sanity_check_for_adding_genes(self):

        # check for genes that do not appear in the contigs database
        bad_gene_caller_ids = [
            g for g in self.genes_of_interest
            if g not in self.genes_in_contigs_database
        ]
        if bad_gene_caller_ids:
            raise ConfigError(("This gene caller id you" if len(bad_gene_caller_ids) == 1 else \
                               "These gene caller ids you") + " want to add to the structure database\
                               are not known to the contigs database: {}. You have only 2 lives\
                               left. 2 more mistakes, and anvi'o will automatically uninstall\
                               itself. Yes, seriously :("                                                         .format(", ".join([str(x) for x in bad_gene_caller_ids])))

        # check for genes that do already appear in the structure database
        redundant_gene_caller_ids = [
            g for g in self.genes_of_interest
            if g in self.structure_db.genes_queried
        ]
        if redundant_gene_caller_ids:
            raise ConfigError(("This gene caller id you" if len(redundant_gene_caller_ids) == 1 else \
                               "These gene caller ids you") + " want to add to the structure database\
                               is already in the structure database: {}. If you want to re-do the\
                               modelling, then first remove it with --genes-to-remove or\
                               --genes-to-remove-file (you can do it in the same\
                               anvi-update-genes-in-structure-database command)."                                                                                 .\
                                   format(", ".join([str(x) for x in redundant_gene_caller_ids])))

        # raise warning if number of genes is greater than 20
        if len(self.genes_of_interest) > 20:
            self.run.warning(
                "Modelling protein structures is no joke. The number of genes you want\
                              to append to the structure database is {}, which is a lot (of time!).\
                              CTRL + C to cancel.".format(
                    len(self.genes_of_interest)))

        if not self.skip_DSSP:
            if utils.is_program_exists(
                    "mkdssp",
                    dont_raise=True):  # mkdssp is newer and preferred
                self.DSSP_executable = "mkdssp"

            if not self.DSSP_executable:
                if utils.is_program_exists("dssp", dont_raise=True):
                    self.DSSP_executable = "dssp"
                else:
                    raise ConfigError(
                        "An anvi'o function needs 'mkdssp' or 'dssp' to be installed on your system, but\
                                       neither seem to appear in your path :/ If you are certain you have either on your\
                                       system (for instance you can run either by typing 'mkdssp' or 'dssp' in your terminal\
                                       window), you may want to send a detailed bug report. If you want to install DSSP,\
                                       check out http://merenlab.org/2016/06/18/installing-third-party-software/#dssp.\
                                       If you want to skip secondary structure and solvent accessibility annotation,\
                                       provide the flag --skip-DSSP.")

            self.run.info_single("Anvi'o found the DSSP executable `%s`, and will use it."\
                                  % self.DSSP_executable, nl_before=1, nl_after=1)
示例#13
0
    def sanity_check(self):

        # check for genes that do not appear in the contigs database
        bad_gene_caller_ids = [
            g for g in self.genes_of_interest
            if g not in self.genes_in_contigs_database
        ]
        if bad_gene_caller_ids:
            raise ConfigError(("This gene caller id you provided is" if len(bad_gene_caller_ids) == 1 else \
                               "These gene caller ids you provided are") + " not known to this contigs database: {}.\
                               You have only 2 lives left. 2 more mistakes, and anvi'o will automatically uninstall \
                               itself. Yes, seriously :("                                                         .format(", ".join([str(x) for x in bad_gene_caller_ids])))

        # Finally, raise warning if number of genes is greater than 20
        if len(self.genes_of_interest) > 20:
            self.run.warning(
                "Modelling protein structures is no joke. The number of genes you want protein structures for is \
                              {}, which is a lot (of time!). If its taking too long, consider using the --very-fast flag. \
                              CTRL + C to cancel.".format(
                    len(self.genes_of_interest)))

        # if self.percent_identical_cutoff is < 25, you should be careful about accuracy of models
        if self.percent_identical_cutoff < 25:
            self.run.warning(
                "You selected a percent identical cutoff of {}%. Below 25%, you should pay close attention \
                              to the quality of the proteins...".format(
                    self.percent_identical_cutoff))

        # check that DSSP exists
        if self.skip_DSSP:
            self.run.warning(
                "You requested to skip amino acid residue annotation with DSSP. A bold move only an expert could justify... \
                              Anvi'o's respect for you increases slightly.")

        else:
            if utils.is_program_exists(
                    "mkdssp",
                    dont_raise=True):  # mkdssp is newer and preferred
                self.DSSP_executable = "mkdssp"

            if not self.DSSP_executable:
                if utils.is_program_exists("dssp", dont_raise=True):
                    self.DSSP_executable = "dssp"
                else:
                    raise ConfigError(
                        "An anvi'o function needs 'mkdssp' or 'dssp' to be installed on your system, but\
                                       neither seem to appear in your path :/ If you are certain you have either on your\
                                       system (for instance you can run either by typing 'mkdssp' or 'dssp' in your terminal\
                                       window), you may want to send a detailed bug report. If you want to install DSSP,\
                                       check out http://merenlab.org/2016/06/18/installing-third-party-software/#dssp.\
                                       If you want to skip secondary structure and solvent accessibility annotation,\
                                       provide the flag --skip-DSSP.")

            self.run.info_single("Anvi'o found the DSSP executable `%s`, and will use it."\
                                  % self.DSSP_executable, nl_before=1, nl_after=1)
示例#14
0
    def __init__(self, progress=progress, run=run, program_name = 'muscle'):
        """A class to take care of muscle alignments."""
        self.progress = progress
        self.run = run

        self.program_name = program_name

        utils.is_program_exists(self.program_name)

        self.citation = "Edgar, doi:10.1093/nar/gkh340"
        self.web = "http://www.drive5.com/muscle"
示例#15
0
    def __init__(self, progress=progress, run=run, program_name='muscle'):
        """A class to take care of muscle alignments."""
        self.progress = progress
        self.run = run

        self.program_name = program_name

        utils.is_program_exists(self.program_name)

        self.citation = "Edgar, doi:10.1093/nar/gkh340"
        self.web = "http://www.drive5.com/muscle"
示例#16
0
    def __init__(self, progress=progress, run=run, program_name = 'famsa'):
        """A class to take care of PSAs with FAMSA."""
        self.progress = progress
        self.run = run

        self.program_name = program_name

        utils.is_program_exists(self.program_name)

        self.citation = "Deorowicz et al., doi:10.1038/srep33964"
        self.web = "https://github.com/refresh-bio/FAMSA"
示例#17
0
文件: famsa.py 项目: yinx843/anvio
    def __init__(self, progress=progress, run=run, program_name='famsa'):
        """A class to take care of PSAs with FAMSA."""
        self.progress = progress
        self.run = run

        self.program_name = program_name

        utils.is_program_exists(self.program_name)

        self.citation = "Deorowicz et al., doi:10.1038/srep33964"
        self.web = "https://github.com/refresh-bio/FAMSA"
示例#18
0
    def format_protein_db(self, input_file_path, output_file_path):
        progress.new('Formatting raw files')
        progress.update('Decompressing protein sequences')

        # poor man's uncompress
        temp_fasta_path = filesnpaths.get_temp_file_path()
        with open(temp_fasta_path, 'wb') as f_out, gzip.open(input_file_path, 'rb') as f_in:
            f_out.write(f_in.read())

        progress.end()

        if utils.is_program_exists('diamond', dont_raise=True):
            output_dir = J(self.COG_data_dir, 'DB_DIAMOND')
            if os.path.exists(output_dir):
                shutil.rmtree(output_dir)

            os.mkdir(output_dir)

            output_db_path = J(output_dir, 'COG')
            log_file_path = J(output_dir, 'log.txt')

            self.run.info('Diamond log', log_file_path)

            diamond = Diamond(temp_fasta_path)
            diamond.num_threads = self.num_threads
            diamond.run.log_file_path = log_file_path
            diamond.makedb(output_db_path)
        else:
            self.run.warning("Diamond does not seem to be installed on this system, so anvi'o is not going to\
                              generate a search database for it. Remember this when/if things go South.")

        if utils.is_program_exists('makeblastdb', dont_raise=True) and utils.is_program_exists('blastp', dont_raise=True):
            output_dir = J(self.COG_data_dir, 'DB_BLAST')
            if os.path.exists(output_dir):
                shutil.rmtree(output_dir)

            os.mkdir(output_dir)

            output_db_path = J(output_dir, 'COG')
            log_file_path = J(output_dir, 'log.txt')

            self.run.info('BLAST log', log_file_path)

            blast = BLAST(temp_fasta_path)
            blast.run.log_file_path = log_file_path
            blast.num_threads = self.num_threads
            blast.makedb(os.path.join(output_db_path, 'COG.fa'))
        else:
            self.run.warning("BLAST tools do not seem to be installed on this system, so anvi'o is not going to\
                              generate a search database for them to be used. Keep this in mind for later.")

        os.remove(temp_fasta_path)
示例#19
0
    def __init__(self, mcl_input_file_path, run = run, progress = progress, num_threads = 1):
        self.run = run
        self.progress = progress

        self.mcl_input_file_path = mcl_input_file_path
        self.num_threads = num_threads

        utils.is_program_exists('mcl')

        self.inflation = 2.0

        self.clusters_file_path = 'mcl-clusters.txt'
        self.log_file_path = 'mcl-log-file.txt'
示例#20
0
    def check_version(self):
        """checks the installed version of prodigal, sets the parser"""

        utils.is_program_exists('prodigal')
        output, ret_code = utils.get_command_output_from_shell('prodigal -v')

        prodigal_version_found = output.split('\n')[1].split()[1].split(':')[0].lower()

        if prodigal_version_found not in self.ok_prodigal_versions:
            raise ConfigError, "The prodigal version installed on your system is not compatible\
                                with any of the versions anvi'o can work with. Please install\
                                any of the following versions: %s" % (', '.join(self.ok_prodigal_versions.keys()))

        self.installed_prodigal_version = prodigal_version_found
        self.parser = self.ok_prodigal_versions[prodigal_version_found]
示例#21
0
    def check_version(self):
        """checks the installed version of prodigal, sets the parser"""

        utils.is_program_exists('prodigal')
        output, ret_code = utils.get_command_output_from_shell('prodigal -v')

        version_found = output.split('\n')[1].split()[1].split(':')[0].lower()

        if version_found not in self.available_parsers:
            raise ConfigError, "The prodigal version installed on your system is not compatible\
                                with any of the versions anvi'o can work with. Please install\
                                any of the following versions: %s" % (', '.join(self.available_parsers.keys()))

        self.installed_version = version_found
        self.parser = self.available_parsers[version_found]
示例#22
0
    def __init__(self, query_fasta, run = run, progress = progress, num_threads = 1, overwrite_output_destinations = False):
        self.run = run
        self.progress = progress

        self.num_threads = num_threads
        self.overwrite_output_destinations = overwrite_output_destinations

        utils.is_program_exists('diamond')

        self.tmp_dir = tempfile.gettempdir()

        self.query_fasta = query_fasta
        self.log_file_path = 'diamond-log-file.txt'
        self.target_db_path = 'diamond-target'
        self.search_output_path = 'diamond-search-resuults'
        self.tabular_output_path = 'diamond-search-results.txt'
示例#23
0
    def check_workflow_program_dependencies(self, snakemake_workflow_object, dont_raise=True):
        """Check whether each shell command in a snakemake_workflow_object exists in PATH

        Parameters
        ==========
        snakemake_workflow_object: snakemake.workflow
            Source code of this object found at
            https://snakemake.readthedocs.io/en/stable/_modules/snakemake/workflow.html

        Notes
        =====
        - FIXME Not all of the programs identified here will _actually_ be used in the workflow.
          Finding out which commands will actually be used requires building the DAG and then
          finding the appropriate place in the Snakemake API where we can expose this information.
          See https://github.com/merenlab/anvio/issues/1316 for discussion.
        """

        if self.this_workflow_is_inherited_by_another:
            return

        shell_programs_needed = [r.shellcmd.strip().split()[0] for r in snakemake_workflow_object.rules if r.shellcmd]

        shell_programs_missing = [s for s in shell_programs_needed if not u.is_program_exists(s, dont_raise=dont_raise)]

        run.warning(None, 'Shell programs for the workflow')
        run.info('Needed', ', '.join(shell_programs_needed))
        run.info('Missing', ', '.join(shell_programs_missing) or 'None', nl_after=1)

        if len(shell_programs_missing):
            if dont_raise:
                return
            else:
                raise ConfigError("This workflow will not run without those missing programs are no longer "
                                  "missing :(")
示例#24
0
文件: cogs.py 项目: fauziharoon/anvio
    def __init__(self, args=Args(), run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress

        A = lambda x: args.__dict__[x] if x in args.__dict__ else None
        self.num_threads = A('num_threads')
        self.contigs_db_path = A('contigs_db')
        self.search_with = A('search_with')
        self.temp_dir_path = A('temporary_dir_path')
        self.sensitive = A('sensitive')

        self.log_file_path = None
        self.available_db_search_programs = [
            p for p in ['diamond', 'blastp']
            if utils.is_program_exists(p, dont_raise=True)
        ]

        self.COG_setup = COGsSetup(args)
        self.COG_data_dir = self.COG_setup.COG_data_dir
        self.available_db_search_program_targets = self.COG_setup.get_formatted_db_paths(
        )
        self.essential_files = self.COG_setup.get_essential_file_paths()

        self.search_factory = {
            'diamond': self.search_with_diamond,
            'blastp': self.search_with_blastp
        }

        self.hits = None  # the search function will take care of this one.
示例#25
0
    def check_workflow_program_dependencies(self,
                                            snakemake_workflow_object,
                                            dont_raise=False):
        """This function gets a snakemake workflow object and checks whether each shell command
           exists in the path.
        """

        if self.slave_mode:
            return

        shell_programs_needed = [
            r.shellcmd.strip().split()[0]
            for r in snakemake_workflow_object.rules if r.shellcmd
        ]

        shell_programs_missing = [
            s for s in shell_programs_needed
            if not u.is_program_exists(s, dont_raise=dont_raise)
        ]

        run.warning(None, 'Shell programs for the workflow')
        run.info('Needed', ', '.join(shell_programs_needed))
        run.info('Missing',
                 ', '.join(shell_programs_missing) or 'None',
                 nl_after=1)

        if len(shell_programs_missing):
            if dont_raise:
                return
            else:
                raise ConfigError(
                    "This workflow will not run without those missing programs are no longer\
                                   missing :(")
示例#26
0
    def check_programs(self):
        utils.is_program_exists(self.program_name)

        if self.method == 'ANIb':
            utils.is_program_exists('blastn')
        elif self.method == 'ANIblastall':
            utils.is_program_exists('blastall')
        elif self.method == 'ANIm':
            utils.is_program_exists('nucmer')
示例#27
0
    def __init__(self,
                 query_fasta,
                 target_fasta=None,
                 search_program='blastp',
                 run=run,
                 progress=progress,
                 num_threads=1,
                 overwrite_output_destinations=False):
        """BLAST driver.

           We generate target database from the `target_fasta`. If `target_fasta` is None,
           `query_fasta` is treated as `target_fasta`. If you don't have a FASTA file, but
           all you have are X.phr, X.pin, and x.psq files, you can set `target_fasta` to
           '/path/to/X' and it will still be OK. Calling the target FASTA creates some
           confusion, but we hope if you are reading these lines you have the potential to
           survive anything, so we are not that concerned really.
        """
        self.run = run
        self.progress = progress

        self.num_threads = num_threads
        self.evalue = 1e-05
        self.overwrite_output_destinations = overwrite_output_destinations

        self.tmp_dir = tempfile.gettempdir()

        self.query_fasta = query_fasta
        self.target_fasta = target_fasta

        if not self.target_fasta:
            self.target_fasta = self.query_fasta
        elif self.target_db_path:
            self.target_fasta = self.target_db_path

        self.search_program = search_program
        self.search_output_path = 'blast-search-results.txt'
        self.max_target_seqs = None

        utils.is_program_exists('makeblastdb')
        utils.is_program_exists(self.search_program)

        if not self.run.log_file_path:
            self.run.log_file_path = 'blast-log-file.txt'

        # if names_dict is None, all fine. if not, the query_fasta is assumed to be uniqued, and names_dict is
        # the dictionary that connects the ids in the fasta file, to ids that were identical to it.
        self.names_dict = None
示例#28
0
    def check_programs(self):
        utils.is_program_exists(self.program_name)

        if self.method == 'ANIb':
            utils.is_program_exists('blastn')
        elif self.method == 'ANIblastall':
            utils.is_program_exists('blastall')
        elif self.method == 'ANIm':
            utils.is_program_exists('nucmer')
示例#29
0
    def sanity_check_for_adding_genes(self):

        # check for genes that do not appear in the contigs database
        bad_gene_caller_ids = [g for g in self.genes_of_interest if g not in self.genes_in_contigs_database]
        if bad_gene_caller_ids:
            raise ConfigError(("This gene caller id you" if len(bad_gene_caller_ids) == 1 else \
                               "These gene caller ids you") + " want to add to the structure database\
                               are not known to the contigs database: {}. You have only 2 lives\
                               left. 2 more mistakes, and anvi'o will automatically uninstall\
                               itself. Yes, seriously :(".format(",".join([str(x) for x in bad_gene_caller_ids])))

        # check for genes that do already appear in the structure database
        redundant_gene_caller_ids = [g for g in self.genes_of_interest if g in self.structure_db.genes_queried]
        if redundant_gene_caller_ids and not self.skip_genes_if_already_present:
            raise ConfigError(("This gene caller id you" if len(redundant_gene_caller_ids) == 1 else \
                               "These gene caller ids you") + " want to add to the structure database\
                               is already in the structure database: {}. If you want to re-do the\
                               modelling, then first remove it with --genes-to-remove or\
                               --genes-to-remove-file (you can do it in the same\
                               anvi-update-genes-in-structure-database command).".\
                                   format(",".join([str(x) for x in redundant_gene_caller_ids])))

        # raise warning if number of genes is greater than 20
        if len(self.genes_of_interest) > 20:
            self.run.warning("Modelling protein structures is no joke. The number of genes you want\
                              to append to the structure database is {}, which is a lot (of time!).\
                              CTRL + C to cancel.".format(len(self.genes_of_interest)))

        if not self.skip_DSSP:
            if utils.is_program_exists("mkdssp", dont_raise=True): # mkdssp is newer and preferred
                self.DSSP_executable = "mkdssp"

            if not self.DSSP_executable:
                if utils.is_program_exists("dssp", dont_raise=True):
                    self.DSSP_executable = "dssp"
                else:
                    raise ConfigError("An anvi'o function needs 'mkdssp' or 'dssp' to be installed on your system, but\
                                       neither seem to appear in your path :/ If you are certain you have either on your\
                                       system (for instance you can run either by typing 'mkdssp' or 'dssp' in your terminal\
                                       window), you may want to send a detailed bug report. If you want to install DSSP,\
                                       check out http://merenlab.org/2016/06/18/installing-third-party-software/#dssp.\
                                       If you want to skip secondary structure and solvent accessibility annotation,\
                                       provide the flag --skip-DSSP.")

            self.run.info_single("Anvi'o found the DSSP executable `%s`, and will use it."\
                                  % self.DSSP_executable, nl_before=1, nl_after=1)
示例#30
0
    def __init__(self,
                 mcl_input_file_path,
                 run=run,
                 progress=progress,
                 num_threads=1):
        self.run = run
        self.progress = progress

        self.mcl_input_file_path = mcl_input_file_path
        self.num_threads = num_threads

        utils.is_program_exists('mcl')

        self.inflation = 2.0

        self.clusters_file_path = 'mcl-clusters.txt'
        self.log_file_path = 'mcl-log-file.txt'
示例#31
0
    def sanity_check(self):

        # check for genes that do not appear in the contigs database
        bad_gene_caller_ids = [g for g in self.genes_of_interest if g not in self.genes_in_contigs_database]
        if bad_gene_caller_ids:
            raise ConfigError(("This gene caller id you provided is" if len(bad_gene_caller_ids) == 1 else \
                               "These gene caller ids you provided are") + " not known to this contigs database: {}.\
                               You have only 2 lives left. 2 more mistakes, and anvi'o will automatically uninstall \
                               itself. Yes, seriously :(".format(", ".join([str(x) for x in bad_gene_caller_ids])))

        # Finally, raise warning if number of genes is greater than 20
        if len(self.genes_of_interest) > 20:
            self.run.warning("Modelling protein structures is no joke. The number of genes you want protein structures for is \
                              {}, which is a lot (of time!). If its taking too long, consider using the --very-fast flag. \
                              CTRL + C to cancel.".format(len(self.genes_of_interest)))

        # if self.percent_identical_cutoff is < 25, you should be careful about accuracy of models
        if self.percent_identical_cutoff < 25:
            self.run.warning("You selected a percent identical cutoff of {}%. Below 25%, you should pay close attention \
                              to the quality of the proteins...".format(self.percent_identical_cutoff))

        # check that DSSP exists
        if self.skip_DSSP:
            self.run.warning("You requested to skip amino acid residue annotation with DSSP. A bold move only an expert could justify... \
                              Anvi'o's respect for you increases slightly.")

        else:
            if utils.is_program_exists("mkdssp", dont_raise=True): # mkdssp is newer and preferred
                self.DSSP_executable = "mkdssp"

            if not self.DSSP_executable:
                if utils.is_program_exists("dssp", dont_raise=True):
                    self.DSSP_executable = "dssp"
                else:
                    raise ConfigError("An anvi'o function needs 'mkdssp' or 'dssp' to be installed on your system, but\
                                       neither seem to appear in your path :/ If you are certain you have either on your\
                                       system (for instance you can run either by typing 'mkdssp' or 'dssp' in your terminal\
                                       window), you may want to send a detailed bug report. If you want to install DSSP,\
                                       check out http://merenlab.org/2016/06/18/installing-third-party-software/#dssp.\
                                       If you want to skip secondary structure and solvent accessibility annotation,\
                                       provide the flag --skip-DSSP.")

            self.run.info_single("Anvi'o found the DSSP executable `%s`, and will use it."\
                                  % self.DSSP_executable, nl_before=1, nl_after=1)
示例#32
0
    def __init__(self,
                 query_fasta,
                 run=run,
                 progress=progress,
                 num_threads=1,
                 overwrite_output_destinations=False):
        self.run = run
        self.progress = progress

        self.num_threads = num_threads
        self.overwrite_output_destinations = overwrite_output_destinations

        utils.is_program_exists('diamond')

        self.tmp_dir = tempfile.gettempdir()

        self.query_fasta = query_fasta
        self.log_file_path = 'diamond-log-file.txt'
        self.target_db_path = 'diamond-target'
        self.search_output_path = 'diamond-search-resuults'
        self.tabular_output_path = 'diamond-search-results.txt'
示例#33
0
    def __init__(self, query_fasta, target_fasta=None, search_program='blastp', run=run, progress=progress, num_threads=1, overwrite_output_destinations=False):
        """BLAST driver.

           We generate target database from the `target_fasta`. If `target_fasta` is None,
           `query_fasta` is treated as `target_fasta`. If you don't have a FASTA file, but
           all you have are X.phr, X.pin, and x.psq files, you can set `target_fasta` to
           '/path/to/X' and it will still be OK. Calling the target FASTA creates some
           confusion, but we hope if you are reading these lines you have the potential to
           survive anything, so we are not that concerned really.
        """
        self.run = run
        self.progress = progress

        self.num_threads = num_threads
        self.evalue = 1e-05
        self.overwrite_output_destinations = overwrite_output_destinations

        self.tmp_dir = tempfile.gettempdir()

        self.query_fasta = query_fasta
        self.target_fasta = target_fasta

        if not self.target_fasta:
            self.target_fasta = self.query_fasta
        elif self.target_db_path:
            self.target_fasta = self.target_db_path

        self.search_program = search_program
        self.search_output_path = 'blast-search-results.txt'
        self.max_target_seqs = None

        utils.is_program_exists('makeblastdb')
        utils.is_program_exists(self.search_program)

        if not self.run.log_file_path:
            self.run.log_file_path = 'blast-log-file.txt'

        # if names_dict is None, all fine. if not, the query_fasta is assumed to be uniqued, and names_dict is
        # the dictionary that connects the ids in the fasta file, to ids that were identical to it.
        self.names_dict = None
示例#34
0
    def check_programs(self):
        if self.use_ncbi_blast:
            utils.is_program_exists('blastp')
        else:
            utils.is_program_exists('diamond')

        utils.is_program_exists('mcl')
示例#35
0
    def dry_run(self, workflow_graph_output_file_path_prefix='workflow'):
        """Not your regular dry run.

           The purpose of this function is to make sure there is a way to check for
           workflow program dependencies before the workflow is actually run. this way,
           if there is a `check_workflow_program_dependencies` call at the end of the
           snake file `get_workflow_snake_file_path(self.name)`, it can be called with
           a compiled snakemake `workflow` instance."""

        if self.this_workflow_is_inherited_by_another:
            return

        self.progress.new('Bleep bloop')
        self.progress.update('Quick dry run for an initial sanity check ...')
        args = ['snakemake', '--snakefile', get_workflow_snake_file_path(self.name), \
                '--configfile', self.config_file, '--dryrun', '--quiet']

        if self.save_workflow_graph:
            args.extend(['--dag'])

        log_file_path = filesnpaths.get_temp_file_path()
        u.run_command(args, log_file_path)
        self.progress.end()

        # here we're getting the graph info from the log file like a dirty hacker
        # we are (it still may be better to do it elsewhere more appropriate .. so
        # we can look more decent or whatever):
        if self.save_workflow_graph:
            lines = open(log_file_path, 'rU').readlines()

            try:
                line_of_interest = [line_no for line_no in range(0, len(lines)) if lines[line_no].startswith('digraph')][0]
            except IndexError:
                raise ConfigError("Oh no. Anvi'o was trying to generate a DAG output for you, but something must have "
                                  "gone wrong in a step prior. Something tells anvi'o that if you take a look at the "
                                  "log file here, you may be able to figure it out: '%s'. Sorry!" % log_file_path)
            open(workflow_graph_output_file_path_prefix + '.dot', 'w').write(''.join(lines[line_of_interest:]))

            self.run.info('Workflow DOT file', workflow_graph_output_file_path_prefix + '.dot')

            if u.is_program_exists('dot', dont_raise=True):
                dot_log_file = filesnpaths.get_temp_file_path()
                u.run_command(['dot', '-Tpdf', workflow_graph_output_file_path_prefix + '.dot', '-o', workflow_graph_output_file_path_prefix + '.pdf'], dot_log_file)
                os.remove(dot_log_file)
                self.run.info('Workflow PDF file', workflow_graph_output_file_path_prefix + '.pdf')
            else:
                self.run.warning("Well, anvi'o was going to try to save a nice PDF file for your workflow "
                                 "graph, but clearly you don't have `dot` installed on your system. That's OK. You "
                                 "have your dot file now, and you can Google 'how to view dot file on [your operating "
                                 "system goes here]', and install necessary programs (like .. `dot`).")

        os.remove(log_file_path)
示例#36
0
    def dry_run(self, workflow_graph_output_file_path_prefix='workflow'):
        """Not your regular dry run.

           The purpose of this function is to make sure there is a way to check for
           workflow program dependencies before the workflow is actually run. this way,
           if there is a `check_workflow_program_dependencies` call at the end of the
           snake file `get_workflow_snake_file_path(self.name)`, it can be called with
           a compiled snakemake `workflow` instance."""

        if self.slave_mode:
            return

        self.progress.new('Bleep bloop')
        self.progress.update('Quick dry run for an initial sanity check ...')
        args = ['snakemake', '--snakefile', get_workflow_snake_file_path(self.name), \
                '--configfile', self.config_file, '--dryrun', '--quiet']

        if self.save_workflow_graph:
            args.extend(['--dag'])

        log_file_path = filesnpaths.get_temp_file_path()
        u.run_command(args, log_file_path)
        self.progress.end()

        # here we're getting the graph info from the log file like a dirty hacker
        # we are (it still may be better to do it elsewhere more appropriate .. so
        # we can look more decent or whatever):
        if self.save_workflow_graph:
            lines = open(log_file_path, 'rU').readlines()

            try:
                line_of_interest = [line_no for line_no in range(0, len(lines)) if lines[line_no].startswith('digraph')][0]
            except IndexError:
                raise ConfigError("Oh no. Anvi'o was trying to generate a DAG output for you, but something must have\
                                   gone wrong in a step prior. Something tells anvi'o that if you take a look at the\
                                   log file here, you may be able to figure it out: '%s'. Sorry!" % log_file_path)
            open(workflow_graph_output_file_path_prefix + '.dot', 'w').write(''.join(lines[line_of_interest:]))

            self.run.info('Workflow DOT file', workflow_graph_output_file_path_prefix + '.dot')

            if u.is_program_exists('dot', dont_raise=True):
                dot_log_file = filesnpaths.get_temp_file_path()
                u.run_command(['dot', '-Tpng', workflow_graph_output_file_path_prefix + '.dot', '-o', workflow_graph_output_file_path_prefix + '.png'], dot_log_file)
                os.remove(dot_log_file)
                self.run.info('Workflow PNG file', workflow_graph_output_file_path_prefix + '.png')
            else:
                self.run.warning("Well, anvi'o was going to try to save a nice PNG file for your workflow\
                                  graph, but clearly you don't have `dot` installed on your system. That's OK. You\
                                  have your dot file now, and you can Google 'how to view dot file on [your operating\
                                  system goes here]', and install necessary programs (like .. `dot`).")

        os.remove(log_file_path)
示例#37
0
文件: pyani.py 项目: qclayssen/anvio
    def __init__(self,
                 args={},
                 run=terminal.Run(),
                 progress=terminal.Progress()):
        self.run = run
        self.progress = progress
        self.program_name = 'average_nucleotide_identity.py'
        utils.is_program_exists(self.program_name)

        A = lambda x: args.__dict__[x] if x in args.__dict__ else None
        self.num_threads = A('num_threads') or 1
        self.method = A('method') or 'ANIb'
        self.log_file_path = os.path.abspath(
            A('log_file') or filesnpaths.get_temp_file_path())

        self.run.warning(
            "Anvi'o will use 'PyANI' by Pritchard et al. (DOI: 10.1039/C5AY02550H) to compute ANI. If you publish your findings, \
                            please do not forget to properly credit their work.",
            lc='green',
            header="CITATION")

        self.run.info('[PyANI] Num threads to use', self.num_threads)
        self.run.info('[PyANI] Alignment method', self.method)
        self.run.info('[PyANI] Log file path', self.log_file_path)
示例#38
0
    def __init__(self, args=Args(), run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress

        A = lambda x: args.__dict__[x] if x in args.__dict__ else None
        self.num_threads = A('num_threads')
        self.contigs_db_path = A('contigs_db')
        self.search_with = A('search_with') or 'diamond'
        self.temp_dir_path = A('temporary_dir_path')
        self.sensitive = A('sensitive')

        self.log_file_path = None

        self.default_search_method = 'diamond'
        self.search_methods_factory = {
            'diamond': self.search_with_diamond,
            'blastp': self.search_with_ncbi_blast
        }
        self.available_search_methods = [
            p for p in self.search_methods_factory.keys()
            if utils.is_program_exists(p, dont_raise=True)
        ]

        if not len(self.available_search_methods):
            raise ConfigError(
                "None of the serach methods this class could use, which include '%s', seem to be\
                               available on your system :/" %
                (', '.join(list(self.search_methods_factory.keys()))))

        if self.default_search_method not in self.available_search_methods:
            self.default_search_method = self.available_search_methods[0]

        self.hits = None  # the search function will take care of this one.

        if len(args.__dict__):
            self.COG_setup = COGsSetup(args)
            self.COG_data_dir = self.COG_setup.COG_data_dir
            self.available_db_search_program_targets = self.COG_setup.get_formatted_db_paths(
            )
            self.essential_files = self.COG_setup.get_essential_file_paths()
示例#39
0
    def check_workflow_program_dependencies(self, snakemake_workflow_object, dont_raise=False):
        """This function gets a snakemake workflow object and checks whether each shell command
           exists in the path.
        """

        if self.slave_mode:
            return

        shell_programs_needed = [r.shellcmd.strip().split()[0] for r in snakemake_workflow_object.rules if r.shellcmd]

        shell_programs_missing = [s for s in shell_programs_needed if not u.is_program_exists(s, dont_raise=dont_raise)]

        run.warning(None, 'Shell programs for the workflow')
        run.info('Needed', ', '.join(shell_programs_needed))
        run.info('Missing', ', '.join(shell_programs_missing) or 'None', nl_after=1)

        if len(shell_programs_missing):
            if dont_raise:
                return
            else:
                raise ConfigError("This workflow will not run without those missing programs are no longer\
                                   missing :(")
示例#40
0
    def __init__(self, args=Args(), run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress

        A = lambda x: args.__dict__[x] if x in args.__dict__ else None
        self.num_threads = A('num_threads')
        self.contigs_db_path = A('contigs_db')
        self.search_with = A('search_with') or 'diamond'
        self.temp_dir_path = A('temporary_dir_path')
        self.sensitive = A('sensitive')

        self.log_file_path = None

        self.default_search_method = 'diamond'
        self.search_methods_factory = {'diamond': self.search_with_diamond,
                                       'blastp': self.search_with_ncbi_blast}
        self.available_search_methods = [p for p in self.search_methods_factory.keys() if utils.is_program_exists(p, dont_raise=True)]

        if not len(self.available_search_methods):
            raise ConfigError("None of the search methods this class could use, which include '%s', seem to be "
                              "available on your system :/" % (', '.join(list(self.search_methods_factory.keys()))))

        if self.default_search_method not in self.available_search_methods:
            self.default_search_method = self.available_search_methods[0]

        if len(args.__dict__):
            self.initialize(args)
示例#41
0
文件: programs.py 项目: meren/anvio
import anvio.filesnpaths as filesnpaths

from anvio.summaryhtml import SummaryHTMLOutput
from anvio.errors import ConfigError

__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "*****@*****.**"
__status__ = "Development"


G = lambda d: [p for p in glob.glob(os.path.join(d, 'anvi-*')) if utils.is_program_exists(p, dont_raise=True)]
M = lambda m: [x for x in G(os.path.dirname(utils.is_program_exists(m)))]
S = lambda s: [x for x in G(os.path.dirname(utils.is_program_exists(s)))]
J = lambda x: '\n'.join(x) if x else ''

# this dictionary describes all anvi'o items that are referred from 'requires' and
# 'provudes' statements written in anvi'o programs
ANVIO_ITEMS = {'pan-db': {'name': 'PAN', 'type': 'DB', 'internal': True},
               'contigs-db': {'name': 'CONTIGS', 'type': 'DB', 'internal': True},
               'contigs-fasta': {'name': 'CONTIGS', 'type': 'FASTA', 'internal': False},
               'concatenated-gene-alignment-fasta': {'name': 'CONCATENATED GENE ALIGNMENT', 'type': 'FASTA', 'internal': False},
               'short-reads-fasta': {'name': 'SHORT READS', 'type': 'FASTA', 'internal': False},
               'genes-fasta': {'name': 'GENES', 'type': 'FASTA', 'internal': False},
               'bam-file': {'name': 'BAM FILE', 'type': 'BAM', 'internal': False},
               'protein-structure': {'name': 'PDB FILE', 'type': 'TXT', 'internal': False},
               'raw-bam-file': {'name': 'RAW BAM FILE', 'type': 'BAM', 'internal': False},
示例#42
0
from anvio.summaryhtml import SummaryHTMLOutput
from anvio.errors import ConfigError

__author__ = "Developers of anvi'o (see AUTHORS.txt)"
__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "*****@*****.**"
__status__ = "Development"

G = lambda d: [
    p for p in glob.glob(os.path.join(d, 'anvi-*'))
    if utils.is_program_exists(p, dont_raise=True)
]
M = lambda m: [x for x in G(os.path.dirname(utils.is_program_exists(m)))]
S = lambda s: [x for x in G(os.path.dirname(utils.is_program_exists(s)))]
J = lambda x: '\n'.join(x) if x else ''

# this dictionary describes all anvi'o items that are referred from 'requires' and
# 'provudes' statements written in anvi'o programs
ANVIO_ITEMS = {
    'pan-db': {
        'name': 'PAN',
        'type': 'DB',
        'internal': True
    },
    'contigs-db': {
        'name': 'CONTIGS',
示例#43
0
    def sanity_check(self):
        A = lambda x, t: t(args.__dict__[x]) if x in self.args.__dict__ else None
        null = lambda x: x

        # the directory files will be dumped into (can exist but must be empty)
        if filesnpaths.is_file_exists(self.directory, dont_raise=True):
            filesnpaths.is_output_dir_writable(self.directory)
            if not filesnpaths.is_dir_empty(self.directory):
                raise ModellerError("You cannot give MODELLER a non-empty directory to work in.")
        else:
            filesnpaths.gen_output_directory(self.directory)

        # All MODELLER scripts are housed in self.script_folder
        self.scripts_folder = J(os.path.dirname(anvio.__file__), 'data/misc/MODELLER/scripts')
        if utils.filesnpaths.is_dir_empty(self.scripts_folder):
            raise ConfigError("Anvi'o houses all its MODELLER scripts in {}, but your directory \
                               contains no scripts. Why you do dat?")

        # check that MODELLER exists
        if self.args.__dict__['modeller_executable'] if 'modeller_executable' in self.args.__dict__ else None:
            self.run.info_single("As per your request, anvi'o will use `%s` to run MODELLER." % self.executable, nl_before=1)
            utils.is_program_exists(self.executable)
        else:
            try:
                utils.is_program_exists(self.executable)
            except ConfigError as e:
                raise ConfigError("Anvi'o needs a MODELLER program to be installed on your system. You didn't specify one\
                                   (which can be done with `--modeller-executable`), so anvi'o tried the most recent version\
                                   it knows about: '%s'. If you are certain you have it on your system (for instance you can run it\
                                   by typing '%s' in your terminal window), you may want to send a detailed bug report. If you\
                                   don't have it on your system, check out these installation instructions on our website:\
                                   http://merenlab.org/2016/06/18/installing-third-party-software/#modeller" % (self.executable, self.executable))

            self.run.info_single("Anvi'o found the default executable for MODELLER, `%s`, and will\
                                  use it." % self.executable, nl_before=1)
        self.is_executable_a_MODELLER_program()

        # does target_fasta_path point to a fasta file?
        utils.filesnpaths.is_file_fasta_formatted(self.target_fasta_path)

        # make sure target_fasta is valid
        target_fasta = u.SequenceSource(self.target_fasta_path, lazy_init=False)
        if target_fasta.total_seq != 1:
            raise ConfigError("MODELLER::The input FASTA file must have exactly one sequence.\
                               You provided one with {}.".format(target_fasta.total_seq))

        # (not sanity check but we get self.corresponding_gene_call since target_fasta is opened)
        while next(target_fasta):
            self.corresponding_gene_call = target_fasta.id
        target_fasta.close()

        # parameter consistencies
        if self.deviation < 0.5 or self.deviation > 20:
            self.run.warning("You realize that deviation is given in angstroms, right? You chose {}".format(self.deviation))

        if self.very_fast and self.num_models > 1:
            self.run.warning("Since you chose --very-fast, there will be little difference, if at all, between models. You \
                              can potentially save a lot of time by setting --num-models to 1.")

        if self.percent_identical_cutoff <= 20:
            self.run.warning("Two completely unrelated sequences of same length can expect to have around 10% proper \
                              percent identicalness... Having this parameter below 20% is probably a bad idea.")
示例#44
0
文件: fastani.py 项目: dagahren/anvio
 def check_programs(self):
     utils.is_program_exists(self.program_name)
示例#45
0
 def check_programs(self):
     utils.is_program_exists('diamond')
     utils.is_program_exists('mcl')
示例#46
0
    def __init__(self, run=run):
        self.run = run
        self.progress = progress
        self.command = ['FastTree']

        utils.is_program_exists('FastTree')
示例#47
0
    def __init__(self, run=run, progress=progress):
        self.run = run
        self.progress = progress
        self.program_name = 'metabat2'

        utils.is_program_exists(self.program_name)
示例#48
0
    def __init__(self, args=Args(), run=run, progress=progress):
        self.args = args
        self.run = run
        self.progress = progress

        A = lambda x: args.__dict__[x] if x in args.__dict__ else None
        self.num_threads = A('num_threads')
        self.contigs_db_path = A('contigs_db')
        self.search_with = A('search_with') or 'diamond'
        self.temp_dir_path = A('temporary_dir_path')
        self.sensitive = A('sensitive')

        self.log_file_path = None

        self.default_search_method = 'diamond'
        self.search_methods_factory = {'diamond': self.search_with_diamond,
                                       'blastp': self.search_with_ncbi_blast}
        self.available_search_methods = [p for p in self.search_methods_factory.keys() if utils.is_program_exists(p, dont_raise=True)]

        if not len(self.available_search_methods):
            raise ConfigError("None of the serach methods this class could use, which include '%s', seem to be\
                               available on your system :/" % (', '.join(list(self.search_methods_factory.keys()))))

        if self.default_search_method not in self.available_search_methods:
            self.default_search_method = self.available_search_methods[0]


        self.hits = None # the search function will take care of this one.

        if len(args.__dict__):
            self.COG_setup = COGsSetup(args)
            self.COG_data_dir = self.COG_setup.COG_data_dir
            self.available_db_search_program_targets = self.COG_setup.get_formatted_db_paths()
            self.essential_files = self.COG_setup.get_essential_file_paths()
示例#49
0
    def __init__(self, run=run, progress=progress):
        self.run = run
        self.progress = progress
        self.program_name = 'run_MaxBin.pl'

        utils.is_program_exists(self.program_name)
示例#50
0
    def __init__(self, run=run, progress=progress):
        self.run = run
        self.progress = progress
        self.program_name = 'Binsanity'

        utils.is_program_exists(self.program_name)