Пример #1
0
def run_non_biosynthetic_phmms(fasta: str) -> Dict[str, List[str]]:
    """ Finds lanthipeptide-specific domains in the input fasta

        Arguments:
            fasta: a string containing gene sequences in fasta format

        Returns:
            a dictionary mapping the hit id to a list of matching query ids
    """
    with open(
            path.get_full_path(__file__, "data", "non_biosyn_hmms",
                               "hmmdetails.txt"), "r") as handle:
        hmmdetails = [
            line.strip().split("\t") for line in handle
            if line.count("\t") == 3
        ]
    signature_profiles = [
        HmmSignature(details[0], details[1], int(details[2]), details[3])
        for details in hmmdetails
    ]
    non_biosynthetic_hmms_by_id = defaultdict(
        list)  # type: Dict[str, List[str]]
    for sig in signature_profiles:
        sig.path = path.get_full_path(__file__, "data", "non_biosyn_hmms",
                                      sig.path.rpartition(os.sep)[2])
        runresults = subprocessing.run_hmmsearch(sig.path, fasta)
        for runresult in runresults:
            # Store result if it is above cut-off
            for hsp in runresult.hsps:
                if hsp.bitscore > sig.cutoff:
                    non_biosynthetic_hmms_by_id[hsp.hit_id].append(
                        hsp.query_id)
    return non_biosynthetic_hmms_by_id
Пример #2
0
def run_non_biosynthetic_phmms(cluster_fasta: str) -> Dict[str, List[HSP]]:
    """ Try to identify cleavage site using pHMM """
    if not cluster_fasta:
        return {}
    with open(
            path.get_full_path(__file__, "data", "non_biosyn_hmms",
                               "hmmdetails.txt"), "r") as handle:
        hmmdetails = [
            line.split("\t") for line in handle.read().splitlines()
            if line.count("\t") == 3
        ]
    signature_profiles = [
        HmmSignature(details[0], details[1], int(details[2]), details[3])
        for details in hmmdetails
    ]

    non_biosynthetic_hmms_by_id = defaultdict(list)  # type: Dict[str, Any]
    for sig in signature_profiles:
        sig.path = path.get_full_path(__file__, "data", "non_biosyn_hmms",
                                      sig.path)
        runresults = subprocessing.run_hmmsearch(sig.path, cluster_fasta)
        for runresult in runresults:
            # Store result if it is above cut-off
            for hsp in runresult.hsps:
                if hsp.bitscore > sig.cutoff:
                    non_biosynthetic_hmms_by_id[hsp.hit_id].append(hsp)
    return non_biosynthetic_hmms_by_id