def __init__(self, args, run=run, progress=progress): self.run = run self.progress = progress A = lambda x, t: t(args.__dict__[x]) if x in args.__dict__ else None null = lambda x: x self.contigs_db_path = A('contigs_db', null) self.num_threads = A('num_threads', null) self.hmm_program = A('hmmer_program', null) or 'hmmsearch' self.pfam_data_dir = A('pfam_data_dir', null) # load_catalog will populate this self.function_catalog = {} filesnpaths.is_program_exists(self.hmm_program) utils.is_contigs_db(self.contigs_db_path) if not self.pfam_data_dir: self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__), 'data/misc/Pfam') # here, in the process of checking whether Pfam has been downloaded into the pfam_data_dir, # we also decompress and hmmpress the profile if it is currently gzipped self.is_database_exists() self.run.info('Pfam database directory', self.pfam_data_dir) self.get_version() self.load_catalog()
def __init__(self, args, run=run, progress=progress): self.args = args self.run = run self.progress = progress self.pfam_data_dir = args.pfam_data_dir filesnpaths.is_program_exists('hmmpress') if self.pfam_data_dir and args.reset: raise ConfigError( "You are attempting to run Pfam setup on a non-default data directory (%s) using the --reset flag. " "To avoid automatically deleting a directory that may be important to you, anvi'o refuses to reset " "directories that have been specified with --pfam-data-dir. If you really want to get rid of this " "directory and regenerate it with Pfam data inside, then please remove the directory yourself using " "a command like `rm -r %s`. We are sorry to make you go through this extra trouble, but it really is " "the safest way to handle things." % (self.pfam_data_dir, self.pfam_data_dir)) if not self.pfam_data_dir: self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__), 'data/misc/Pfam') filesnpaths.is_output_dir_writable(os.path.dirname(self.pfam_data_dir)) if not args.reset and not anvio.DEBUG: self.is_database_exists() filesnpaths.gen_output_directory(self.pfam_data_dir, delete_if_exists=args.reset) self.database_url = "http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release" self.files = [ 'Pfam-A.hmm.gz', 'Pfam.version.gz', 'Pfam-A.clans.tsv.gz' ]
def __init__(self, db_path, num_threads_to_use=1, run=run, progress=progress, initializing_for_deletion=False, just_do_it=False, hmm_program_to_use='hmmscan', hmmer_output_directory=None, get_domain_table_output=False): self.num_threads_to_use = num_threads_to_use self.db_path = db_path self.just_do_it = just_do_it self.hmm_program = hmm_program_to_use or 'hmmscan' self.hmmer_output_dir = hmmer_output_directory self.hmmer_desired_output = ( 'table', 'domtable') if get_domain_table_output else 'table' utils.is_contigs_db(self.db_path) filesnpaths.is_program_exists(self.hmm_program) self.contigs_db_hash = db.DB( self.db_path, utils.get_required_version_for_db( self.db_path)).get_meta_value('contigs_db_hash') Table.__init__(self, self.db_path, anvio.__contigs__version__, run, progress) self.init_gene_calls_dict() if not len(self.gene_calls_dict): if self.genes_are_called: self.run.warning( "Tables in this contigs database that should contain gene calls are empty despite the fact that " "you didn't skip the gene calling step while generating this contigs database. This probably means " "that the gene caller did not find any genes among contigs. This is OK for now. But might explode " "later. If it does explode and you decide to let us know about that problem, please remember to mention " "this warning. By the way, this warning probably has been seen by like only 2 people on the planet. Who " "works with contigs with no gene calls? A better implementation of anvi'o will unite researchers who " "study weird stuff.") else: self.run.warning( "It seems you have skipped gene calling step while generating your contigs database, and you have no " "genes calls in tables that should contain gene calls. Anvi'o will let you go with this since some HMM " "sources only operate on DNA sequences, and at this point it doesn't know which HMMs you wish to run. " "If the lack of genes causes a problem, you will get another error message later probably :/" ) if not initializing_for_deletion: self.set_next_available_id(t.hmm_hits_table_name)
def __init__(self, args, run=run, progress=progress): self.args = args self.run = run self.progress = progress self.pfam_data_dir = args.pfam_data_dir filesnpaths.is_program_exists('hmmpress') if not self.pfam_data_dir: self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__), 'data/misc/Pfam') if not args.reset: self.is_database_exists() filesnpaths.gen_output_directory(self.pfam_data_dir, delete_if_exists=args.reset) self.database_url = "http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release" self.files = ['Pfam-A.hmm.gz', 'Pfam.version.gz', 'Pfam-A.clans.tsv.gz']
def __init__(self, args, run=run, progress=progress): self.args = args self.run = run self.progress = progress self.pfam_data_dir = args.pfam_data_dir filesnpaths.is_program_exists('hmmpress') if not self.pfam_data_dir: self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__), 'data/misc/Pfam') if not args.reset: self.is_database_exists() filesnpaths.gen_output_directory(self.pfam_data_dir, delete_if_exists=args.reset) self.database_url = "http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release" self.files = [ 'Pfam-A.hmm.gz', 'Pfam.version.gz', 'Pfam-A.clans.tsv.gz' ]
def __init__(self, args, run=run, progress=progress): self.args = args self.run = run self.progress = progress self.contigs_db_path = args.contigs_db self.num_threads = args.num_threads self.pfam_data_dir = args.pfam_data_dir # load_catalog will populate this self.function_catalog = {} filesnpaths.is_program_exists('hmmscan') utils.is_contigs_db(self.contigs_db_path) if not self.pfam_data_dir: self.pfam_data_dir = os.path.join(os.path.dirname(anvio.__file__), 'data/misc/Pfam') self.is_database_exists() self.run.info('Pfam database directory', self.pfam_data_dir) self.get_version() self.load_catalog()
def check_sge_binaries(self): filesnpaths.is_program_exists('qsub') filesnpaths.is_program_exists('qstat')