def test_find_latest(self): with tempfile.TemporaryDirectory(prefix="aS.pfamdbtest") as temp_db_layout: os.makedirs(os.path.join(temp_db_layout, "pfam", "30.7invalid")) os.makedirs(os.path.join(temp_db_layout, "pfam", "invalid30.7")) os.makedirs(os.path.join(temp_db_layout, "pfam", "irrelevant")) with self.assertRaisesRegex(Exception, "No matching PFAM database in location " + temp_db_layout): pfamdb.find_latest_database_version(temp_db_layout) os.makedirs(os.path.join(temp_db_layout, "pfam", "31.0")) assert pfamdb.find_latest_database_version(temp_db_layout) == "31.0" os.makedirs(os.path.join(temp_db_layout, "pfam", "31.2")) assert pfamdb.find_latest_database_version(temp_db_layout) == "31.2" os.makedirs(os.path.join(temp_db_layout, "pfam", "30.7")) assert pfamdb.find_latest_database_version(temp_db_layout) == "31.2"
def run_on_record(record: Record, results: Optional[hmmer.HmmerResults], options: ConfigType) -> hmmer.HmmerResults: """ Run hmmsearch against PFAM for all CDS features within the record """ if options.clusterhmmer_pfamdb_version == "latest": database_version = pfamdb.find_latest_database_version( options.database_dir) else: database_version = options.clusterhmmer_pfamdb_version if results: previous_db = pfamdb.get_db_version_from_path(results.database) # same version requested, so reuse the results if database_version == previous_db: return results logging.debug("Replacing clusterhmmer results from %s with %s", previous_db, database_version) logging.info('Running cluster PFAM search') features = [] for region in record.get_regions(): features.extend(list(region.cds_children)) database = os.path.join(options.database_dir, 'pfam', database_version, 'Pfam-A.hmm') return hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, database, "clusterhmmer")
def run_on_record(record: Record, results: Optional[hmmer.HmmerResults], options: ConfigType) -> hmmer.HmmerResults: """ Run hmmsearch against PFAM for all CDS features within the record """ if options.fullhmmer_pfamdb_version == "latest": database_version = pfamdb.find_latest_database_version( options.database_dir) else: database_version = options.fullhmmer_pfamdb_version if results: previous_db = pfamdb.get_db_version_from_path(results.database) # same version requested, so reuse the results if database_version == previous_db: return results else: logging.debug("Replacing fullhmmer results from %s with %s", previous_db, database_version) logging.info('Running whole-genome PFAM search') database = os.path.join(options.database_dir, 'pfam', database_version, 'Pfam-A.hmm') return hmmer.run_hmmer(record, record.get_cds_features(), MAX_EVALUE, MIN_SCORE, database, "fullhmmer")
def setUp(self): self._old_max_evalue = cluster_hmmer.MAX_EVALUE self._old_min_score = cluster_hmmer.MIN_SCORE cluster_hmmer.MAX_EVALUE = 0.02 cluster_hmmer.MIN_SCORE = 1. self.config = build_config([], isolated=True, modules=antismash.get_all_modules()) self.latest_pfam = pfamdb.find_latest_database_version( self.config.database_dir) self.tracer = TraceTracker() self.file_list = [ 'Pfam-A.hmm', 'Pfam-A.hmm.h3f', 'Pfam-A.hmm.h3i', 'Pfam-A.hmm.h3m', 'Pfam-A.hmm.h3p' ] mock('antismash.common.path.locate_file', returns_iter=self.file_list, tracker=self.tracer) mock('antismash.common.subprocessing.run_hmmscan', returns=[]) self.expected_trace = """Called antismash.common.path.locate_file( '{0}/pfam/{1}/Pfam-A.hmm') Called antismash.common.path.locate_file( '{0}/pfam/{1}/Pfam-A.hmm.h3f') Called antismash.common.path.locate_file( '{0}/pfam/{1}/Pfam-A.hmm.h3i') Called antismash.common.path.locate_file( '{0}/pfam/{1}/Pfam-A.hmm.h3m') Called antismash.common.path.locate_file( '{0}/pfam/{1}/Pfam-A.hmm.h3p')""".format(self.config.database_dir, self.latest_pfam)
def check_options(options: ConfigType) -> List[str]: """ Check the requested PFAM database exists """ database_version = options.fullhmmer_pfamdb_version pfam_dir = os.path.join(options.database_dir, "pfam") if database_version == "latest": database_version = pfamdb.find_latest_database_version(options.database_dir) return pfamdb.check_db(os.path.join(pfam_dir, database_version))
def setUp(self): self._old_max_evalue = full_hmmer.MAX_EVALUE self._old_min_score = full_hmmer.MIN_SCORE full_hmmer.MAX_EVALUE = 0.02 full_hmmer.MIN_SCORE = 1. self.config = build_config([], isolated=True, modules=antismash.get_all_modules()) self.latest_pfam = pfamdb.find_latest_database_version( self.config.database_dir)
def run_on_record(record, results, options) -> hmmer.HmmerResults: """ Run hmmsearch against PFAM for all CDS features within the record """ if results: return results logging.info('Running whole-genome PFAM search') if options.fullhmmer_pfamdb_version == "latest": database_version = pfamdb.find_latest_database_version( options.database_dir) else: database_version = options.fullhmmer_pfamdb_version database = os.path.join(options.database_dir, 'pfam', database_version, 'Pfam-A.hmm') return hmmer.run_hmmer(record, record.get_cds_features(), MAX_EVALUE, MIN_SCORE, database, "fullhmmer")
def check_prereqs() -> List[str]: """ Ensure at least one database exists and is valid """ failure_messages = [] for binary_name in ['hmmscan']: if not path.locate_executable(binary_name): failure_messages.append("Failed to locate executable: %r" % binary_name) data_dir = get_config().database_dir try: version = pfamdb.find_latest_database_version(data_dir) except ValueError as err: failure_messages.append(str(err)) return failure_messages data_path = os.path.join(data_dir, "pfam", version) failure_messages.extend(pfamdb.check_db(data_path)) return failure_messages
def run_on_record(record, results, options) -> hmmer.HmmerResults: """ Run hmmsearch against PFAM for all CDS features within the record """ if results: return results logging.info('Running cluster PFAM search') if options.clusterhmmer_pfamdb_version == "latest": database_version = pfamdb.find_latest_database_version( options.database_dir) else: database_version = options.clusterhmmer_pfamdb_version database = os.path.join(options.database_dir, 'pfam', database_version, 'Pfam-A.hmm') features = [] for cluster in record.get_clusters(): features.extend(list(cluster.cds_children)) return hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, database, "clusterhmmer")