示例#1
0
    def is_database_exists(self):
        """Checks pfam database and interacdome table data exist. Overwrites Pfam.is_database_exists"""

        try:
            Pfam.is_database_exists(self)
        except ConfigError:
            raise ConfigError("It seems you do not have the associated Pfam data required to use "
                              "InteracDome, please run 'anvi-setup-interacdome' to download it. Then "
                              "run this command again.")
示例#2
0
    def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()):

        self.run = run
        self.progress = progress

        self.run.warning("Anvi'o will use 'InteracDome' by Kobren and Singh (DOI: 10.1093/nar/gky1224) to attribute binding frequencies. "
                         "If you publish your findings, please do not forget to properly credit their work.", lc='green', header="CITATION")

        A = lambda x, t: t(args.__dict__[x]) if x in args.__dict__ else None
        null = lambda x: x
        self.interacdome_data_dir = A('interacdome_data_dir', null) or constants.default_interacdome_data_path
        self.information_content_cutoff = A('information_content_cutoff', null) or 4
        self.min_binding_frequency = A('min_binding_frequency', null) or 0
        self.min_hit_fraction = A('min_hit_fraction', null) or 0.8
        self.interacdome_dataset = A('interacdome_dataset', null) or 'representable'
        self.output_prefix = A('output_file_prefix', null)
        self.just_do_it = A('just_do_it', null)

        self.run.warning("", header='INITIALIZATION', lc='green')
        self.run.info("Interacdome dataset used", self.interacdome_dataset)
        self.run.info("Minimum hit fraction", self.min_hit_fraction)

        self.hmm_filepath = os.path.join(self.interacdome_data_dir, 'Pfam-A.hmm')

        # Init the InteracDome table
        self.interacdome_table = InteracDomeTableData(kind=self.interacdome_dataset, interacdome_data_dir=self.interacdome_data_dir)
        self.interacdome_table.load()

        # Init the Pfam baseclass
        args.hmmer_program = 'hmmsearch' # Force use of hmmsearch
        args.pfam_data_dir = self.interacdome_data_dir
        Pfam.__init__(self, args, run=self.run, progress=self.progress)

        # Init contigs database
        args = argparse.Namespace(contigs_db=self.contigs_db_path)
        self.contigs_db = dbops.ContigsSuperclass(args)

        self.potentially_remove_previous_interacdome_data()

        # Init the HMM profile
        self.hmms = pfam.HMMProfile(self.hmm_filepath)

        # This dictionary is populated and cast as a dataframe. It contains all of the per-residue
        # binding frequency information for each hit
        self.bind_freq = {}

        # This dictionary (eventual dataframe) is just like self.bind_freq, except has averaged
        # binding frequencies for residue-ligand combos that have multiple contributing hits. It
        # also drops all contributing match state information
        self.avg_bind_freq = {}

        # This is a modified version of self.avg_bind_freq that is compatible with the
        # amino_acid_additional_data table structure, i.e.
        # tables.amino_acid_additional_data_table_structure
        self.amino_acid_additional_data = {}