def run_on_record(record: Record, results: Optional[hmmer.HmmerResults], options: ConfigType) -> hmmer.HmmerResults: """ Run hmmsearch against PFAM for all CDS features within the record """ if options.fullhmmer_pfamdb_version == "latest": database_version = pfamdb.find_latest_database_version( options.database_dir) else: database_version = options.fullhmmer_pfamdb_version if results: previous_db = pfamdb.get_db_version_from_path(results.database) # same version requested, so reuse the results if database_version == previous_db: return results else: logging.debug("Replacing fullhmmer results from %s with %s", previous_db, database_version) logging.info('Running whole-genome PFAM search') database = os.path.join(options.database_dir, 'pfam', database_version, 'Pfam-A.hmm') return hmmer.run_hmmer(record, record.get_cds_features(), MAX_EVALUE, MIN_SCORE, database, "fullhmmer")
def run_on_record(record: Record, results: Optional[hmmer.HmmerResults], options: ConfigType) -> hmmer.HmmerResults: """ Run hmmsearch against PFAM for all CDS features within the record """ if options.clusterhmmer_pfamdb_version == "latest": database_version = pfamdb.find_latest_database_version( options.database_dir) else: database_version = options.clusterhmmer_pfamdb_version if results: previous_db = pfamdb.get_db_version_from_path(results.database) # same version requested, so reuse the results if database_version == previous_db: return results logging.debug("Replacing clusterhmmer results from %s with %s", previous_db, database_version) logging.info('Running cluster PFAM search') features = [] for region in record.get_regions(): features.extend(list(region.cds_children)) database = os.path.join(options.database_dir, 'pfam', database_version, 'Pfam-A.hmm') return hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, database, "clusterhmmer")
def run_on_record(record, results, options) -> hmmer.HmmerResults: """ Run hmmsearch against PFAM for all CDS features within the record """ if results: return results logging.info('Running whole-genome PFAM search') if options.fullhmmer_pfamdb_version == "latest": database_version = pfamdb.find_latest_database_version( options.database_dir) else: database_version = options.fullhmmer_pfamdb_version database = os.path.join(options.database_dir, 'pfam', database_version, 'Pfam-A.hmm') return hmmer.run_hmmer(record, record.get_cds_features(), MAX_EVALUE, MIN_SCORE, database, "fullhmmer")
def run_rrefinder(record: Record, bitscore_cutoff: float, min_length: int, database: str) -> RREFinderResults: """Run RREFinder on a given record """ # Gather all RRE candidates candidates_by_protocluster, cds_info = gather_rre_candidates(record) # Run hmmscan per protocluster and gather the hits if cds_info == {}: filtered_hits_by_protocluster: Dict[int, List[str]] = {} filtered_hits_by_cds: Dict[str, List[HmmerHit]] = {} else: hmm_results = run_hmmer(record, cds_info.values(), max_evalue=1, min_score=bitscore_cutoff, database=database, tool='rrefinder', use_cut_tc=False, filter_overlapping=False) # Extract the RRE hits hits_by_cds = extract_rre_hits(hmm_results) # Filter the hits filtered_hits_by_cds, filtered_hits_by_protocluster = filter_hits(hits_by_cds, candidates_by_protocluster, min_length, bitscore_cutoff) return RREFinderResults(record.id, bitscore_cutoff, min_length, filtered_hits_by_protocluster, filtered_hits_by_cds)
def run_on_record(record, results, options) -> hmmer.HmmerResults: """ Run hmmsearch against PFAM for all CDS features within the record """ if results: return results logging.info('Running cluster PFAM search') if options.clusterhmmer_pfamdb_version == "latest": database_version = pfamdb.find_latest_database_version( options.database_dir) else: database_version = options.clusterhmmer_pfamdb_version database = os.path.join(options.database_dir, 'pfam', database_version, 'Pfam-A.hmm') features = [] for cluster in record.get_clusters(): features.extend(list(cluster.cds_children)) return hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, database, "clusterhmmer")
def run_on_record(record: Record, results: Optional[TIGRFamResults], options: ConfigType) -> TIGRFamResults: """ Run hmmsearch against TIGRFam for all CDS features within the record """ logging.info('Running TIGRFam search') if results: return results features = [] for region in record.get_regions(): features.extend(list(region.cds_children)) tigr_db = os.path.join(options.database_dir, "tigrfam", "TIGRFam.hmm") hmmer_results = hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, tigr_db, "tigrfam", filter_overlapping=False) return TIGRFamResults.from_hmmer_results(hmmer_results)
def run_rrefinder(record: Record, bitscore_cutoff: float, min_length: int, database: str) -> RREFinderResults: """Run RREFinder on a given record """ # Gather all RRE candidates candidates_per_protocluster, cds_info = gather_rre_candidates(record) # Run hmmscan per protocluster and gather the hits hmm_results = run_hmmer(record, cds_info.values(), max_evalue=1, min_score=bitscore_cutoff, database=database, tool='rrefinder', use_cut_tc=False) # Extract the RRE hits hit_info = extract_rre_hits(hmm_results) # Filter the hits filtered_hit_info, filtered_hits_per_protocluster = filter_hits( hit_info, candidates_per_protocluster, min_length, bitscore_cutoff) # Convert to RREFinderResults object RRE_results = RREFinderResults(record.id, bitscore_cutoff, min_length, filtered_hits_per_protocluster, filtered_hit_info) return RRE_results