def run_non_biosynthetic_phmms(fasta: str) -> Dict[str, List[str]]: """ Finds lanthipeptide-specific domains in the input fasta Arguments: fasta: a string containing gene sequences in fasta format Returns: a dictionary mapping the hit id to a list of matching query ids """ with open( path.get_full_path(__file__, "data", "non_biosyn_hmms", "hmmdetails.txt"), "r") as handle: hmmdetails = [ line.strip().split("\t") for line in handle if line.count("\t") == 3 ] signature_profiles = [ HmmSignature(details[0], details[1], int(details[2]), details[3]) for details in hmmdetails ] non_biosynthetic_hmms_by_id = defaultdict( list) # type: Dict[str, List[str]] for sig in signature_profiles: sig.path = path.get_full_path(__file__, "data", "non_biosyn_hmms", sig.path.rpartition(os.sep)[2]) runresults = subprocessing.run_hmmsearch(sig.path, fasta) for runresult in runresults: # Store result if it is above cut-off for hsp in runresult.hsps: if hsp.bitscore > sig.cutoff: non_biosynthetic_hmms_by_id[hsp.hit_id].append( hsp.query_id) return non_biosynthetic_hmms_by_id
def run_non_biosynthetic_phmms(cluster_fasta: str) -> Dict[str, List[HSP]]: """ Try to identify cleavage site using pHMM """ if not cluster_fasta: return {} with open( path.get_full_path(__file__, "data", "non_biosyn_hmms", "hmmdetails.txt"), "r") as handle: hmmdetails = [ line.split("\t") for line in handle.read().splitlines() if line.count("\t") == 3 ] signature_profiles = [ HmmSignature(details[0], details[1], int(details[2]), details[3]) for details in hmmdetails ] non_biosynthetic_hmms_by_id = defaultdict(list) # type: Dict[str, Any] for sig in signature_profiles: sig.path = path.get_full_path(__file__, "data", "non_biosyn_hmms", sig.path) runresults = subprocessing.run_hmmsearch(sig.path, cluster_fasta) for runresult in runresults: # Store result if it is above cut-off for hsp in runresult.hsps: if hsp.bitscore > sig.cutoff: non_biosynthetic_hmms_by_id[hsp.hit_id].append(hsp) return non_biosynthetic_hmms_by_id