Exemplo n.º 1
0
def get_detected_domains(cluster: secmet.Protocluster) -> Dict[str, int]:
    """ Gathers all detected domain ids from a cluster. Includes detection of
        some extra HMM profiles specific to sactipeptides.

        Arguments:
            cluster: the Protocluster to gather domains from

        Returns:
            a dictionary mapping domain ids to number of times that domain was found
    """
    found_domains = {}  # type: Dict[str, int]
    # Gather biosynthetic domains
    for feature in cluster.cds_children:
        if not feature.sec_met:
            continue
        for domain_id in feature.sec_met.domain_ids:
            found_domains[domain_id] = found_domains.get(domain_id, 0) + 1

    # Gather non-biosynthetic domains
    non_biosynthetic_hmms_by_id = run_non_biosynthetic_phmms(
        fasta.get_fasta_from_features(cluster.cds_children))
    for hsps_found_for_this_id in non_biosynthetic_hmms_by_id.values():
        for hsp in hsps_found_for_this_id:
            found_domains[hsp.query_id] = found_domains.get(hsp.query_id,
                                                            0) + 1

    return found_domains
Exemplo n.º 2
0
    def get_alignments(self) -> List[Alignment]:
        """ Builds an Alignment for each hit in the results of running the
            provided command on the provided data.
        """
        if not self.domains_of_interest:
            return []

        # for safety of the tools, rename long domain names to a simple numeric index
        data = fasta.get_fasta_from_features(self.domains_of_interest, numeric_names=True)
        assert data, "empty fasta created"

        extra_args = ["-T", "0",  # min score
                      "-E", "0.1"]  # max evalue
        results = subprocessing.run_hmmpfam2(self.database, data, extra_args=extra_args)

        alignments = []
        for result in results:
            if not result.hsps:
                continue
            assert result.id == result.hsps[0].aln[0].id
            # fetch back the real domain from the numeric index used in the fasta
            domain = self.domains_of_interest[int(result.id)]
            alignments.append(Alignment(domain, result.hsps[0].aln[0].seq, result.hsps[0].aln[1].seq,
                                        result.hsps[0].hit_start, result.hsps[0].hit_end))
        return alignments
Exemplo n.º 3
0
def get_detected_domains(cluster: secmet.Protocluster) -> Set[str]:
    """ Gathers all detected domain ids from a cluster. Includes detection of
        some extra HMM profiles specific to thiopeptides.

        Arguments:
            cluster: the Cluster to gather domains from

        Return:
            a set of domain ids
    """
    found_domains = []  # type: List[str]
    # Gather biosynthetic domains
    for feature in cluster.cds_children:
        if not feature.sec_met:
            continue
        found_domains.extend(feature.sec_met.domain_ids)

    # Gather non-biosynthetic domains
    cluster_fasta = fasta.get_fasta_from_features(cluster.cds_children)
    non_biosynthetic_hmms_by_id = run_non_biosynthetic_phmms(cluster_fasta)
    non_biosynthetic_hmms_found = []  # type: List[str]
    for hsps_found_for_this_id in non_biosynthetic_hmms_by_id.values():
        for hsp in hsps_found_for_this_id:
            if hsp.query_id not in non_biosynthetic_hmms_found:
                non_biosynthetic_hmms_found.append(hsp.query_id)
    found_domains.extend(non_biosynthetic_hmms_found)

    return set(found_domains)
Exemplo n.º 4
0
def generate_domains(record: Record) -> NRPSPKSDomains:
    """ Annotates NRPS/PKS domains on CDS features. The `nrps_pks` member of
        each feature will be updated, along with creating CDSMotif features
        when relevant.

        Arguments:
            record: the secmet.Record of which to annotate CDS features

        Returns:
            a NRPSPKSDomains instance containing all found motifs and domain HMMs for each CDS
    """
    results = NRPSPKSDomains(record.id)

    cds_within_clusters = record.get_cds_features_within_clusters()
    assert cds_within_clusters  # because every cluster should have genes

    fasta = get_fasta_from_features(cds_within_clusters)
    cds_domains = find_domains(fasta, record)
    cds_motifs = find_ab_motifs(fasta)

    for cds in cds_within_clusters:
        domains = cds_domains.get(cds.get_name(), [])
        motifs = cds_motifs.get(cds.get_name(), [])
        if not (domains or motifs):
            continue
        domain_type = classify_cds([domain.hit_id for domain in domains])
        results.cds_results[cds] = CDSResult(domains, motifs, domain_type)

    for cds, cds_result in results.cds_results.items():
        cds_result.annotate_domains(record, cds)
    results.added = True
    return results
Exemplo n.º 5
0
def run_hmmer(record: Record, features: Iterable[CDSFeature], max_evalue: float,
              min_score: float, database: str, tool: str, filter_overlapping: bool = True
              ) -> HmmerResults:
    """ Build hmmer results for the given features

        Arguments:
            record: the Record instance to run hmmer over
            features: the list of CDSFeatures to run over specifically
            max_evalue: a maximum evalue allowed for hits (exclusive)
            min_evalue: a minimum evalue allowed for hits (exclusive)
            database: the database to search for hits within
            tool: the name of the specific tool calling into this module
    """
    if not os.path.exists(database):
        raise ValueError("Given database does not exist: %s" % database)
    query_sequence = fasta.get_fasta_from_features(features)
    hmmscan_results = subprocessing.run_hmmscan(database, query_sequence, opts=["--cut_tc"])
    hits = build_hits(record, hmmscan_results, min_score, max_evalue, database)
    if filter_overlapping:
        results_by_cds = defaultdict(list)
        for hit in hits:
            results_by_cds[hit.locus_tag].append(hit)
        cutoffs = pfamdb.get_pfam_cutoffs(database)
        hits = []
        for locus_hits in results_by_cds.values():
            hits.extend(remove_overlapping(locus_hits, cutoffs))
    return HmmerResults(record.id, max_evalue, min_score, database, tool, hits)
Exemplo n.º 6
0
def scan_for_functions(cds_features: List[CDSFeature], database: str,
                       hmmscan_opts: Optional[List[str]] = None) -> Dict[str, HMMResult]:
    """ Finds possible classifications for the provided genes.

        Arguments:
            cds_features: a list of CDSFeatures to classify
            database: the path to the database to check
            hmmscan_opts: a list of extra options to provide to hmmscan

        Returns:
            a dictionary mapping CDS name to a list of HMMResult instances of
                classifications
    """
    search_fasta = fasta.get_fasta_from_features(cds_features)
    results = subprocessing.run_hmmscan(database, search_fasta, hmmscan_opts)
    hmm_lengths = utils.get_hmm_lengths(database)
    hmm_results = refine_hmmscan_results(results, hmm_lengths)

    best_hits = {}  # type: Dict[str, HMMResult]

    for cds in cds_features:
        cds_name = cds.get_name()
        hits = hmm_results.get(cds_name)
        if not hits:
            continue
        best_hits[cds_name] = hits[0]

    return best_hits
Exemplo n.º 7
0
def get_detected_domains(genes: List[CDSFeature]) -> List[str]:
    """ Gathers all detected domains in a cluster, including some not detected
        by hmm_detection.

        Arguments:
            genes: a list of genes to check

        Returns:
            a list of strings, each string being the name of a domain in the
            cluster
    """
    found_domains = []  # type: List[str]
    # Gather biosynthetic domains
    for feature in genes:
        if not feature.sec_met:
            continue
        found_domains.extend(feature.sec_met.domain_ids)

    # Gather non-biosynthetic domains
    cluster_fasta = get_fasta_from_features(genes)
    assert cluster_fasta
    non_biosynthetic_hmms_by_id = run_non_biosynthetic_phmms(cluster_fasta)
    non_biosynthetic_hmms_found = []  # type: List[str]
    for hsps_found in non_biosynthetic_hmms_by_id.values():
        for hsp in hsps_found:
            if hsp not in non_biosynthetic_hmms_found:
                non_biosynthetic_hmms_found.append(hsp)
    found_domains += non_biosynthetic_hmms_found

    return found_domains
Exemplo n.º 8
0
def specific_analysis(record: secmet.Record,
                      options: ConfigType) -> SactiResults:
    """ Analyse each sactipeptide cluster and find precursors within it.
        If an unannotated ORF would contain the precursor, it will be annotated.

        Arguments:
            record: the Record to analyse

        Returns:
            a SactiResults instance holding all found precursors and new ORFs
    """
    results = SactiResults(record.id)
    new_feature_hits = 0
    motif_count = 0
    counter = 0
    for cluster in record.get_protoclusters():
        if cluster.product != 'sactipeptide':
            continue

        # Find candidate ORFs that are not yet annotated
        new_orfs = all_orfs.find_all_orfs(record, cluster)
        hmm_results = run_non_biosynthetic_phmms(
            fasta.get_fasta_from_features(new_orfs))
        annotate_orfs(new_orfs, hmm_results)

        # Get all CDS features to evaluate for RiPP-likeness
        candidates = list(cluster.cds_children) + new_orfs
        domains = get_detected_domains(cluster)

        # Evaluate each candidate precursor peptide
        for candidate in candidates:
            motif = run_sactipred(cluster, candidate, domains)
            if motif is None:
                continue

            results.motifs_by_locus[candidate.get_name()].append(motif)
            motif_count += 1
            results.clusters[cluster.get_protocluster_number()].add(
                candidate.get_name())
            # track new CDSFeatures if found with all_orfs
            if candidate.region is None:
                results.new_cds_features.add(candidate)
                new_feature_hits += 1

        # Analyze the cluster with RREfinder
        counter += 1
        name = '%s_%s_%s' % (record.id, cluster.product, counter)
        RRE_main(cluster, results, name, options)

    if not motif_count:
        logging.debug("Found no sactipeptide motifs")
    else:
        verb = "is" if new_feature_hits == 1 else "are"
        logging.debug(
            "Found %d sactipeptide motif(s) in %d feature(s), %d of which %s new",
            motif_count, len(results.motifs_by_locus), new_feature_hits, verb)
    return results
Exemplo n.º 9
0
def run_hmmer(record: Record, features: List[CDSFeature], max_evalue: float,
              min_score: float, database: str, tool: str) -> HmmerResults:
    """ Build hmmer results for the given features"""
    if not os.path.exists(database):
        raise ValueError("Given database does not exist: %s" % database)
    query_sequence = fasta.get_fasta_from_features(features)
    hmmscan_results = subprocessing.run_hmmscan(database, query_sequence)
    hits = build_hits(record, hmmscan_results, min_score, max_evalue, database)
    return HmmerResults(record.id, max_evalue, min_score, database, tool, hits)
Exemplo n.º 10
0
def run_t2pks_hmmscan(cluster: Cluster) -> Dict[str, List[HMMResult]]:
    """ Runs hmmscan for type II PKS proteins on coding sequences in cluster

        Arguments:
            cluster: Cluster on which the type II PKS hmmscan shall be run

        Returns:
            a dictionary of key: cds and value: list of HMMResults, for hmmscan results of the cluster
    """
    cluster_fasta = fasta.get_fasta_from_features(cluster.cds_children)
    hmm_file = path.get_full_path(__file__, "data", "t2pks.hmm")
    hmm_results = subprocessing.run_hmmscan(hmm_file,
                                            cluster_fasta,
                                            opts=['--cut_tc'])
    hmm_lengths = get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(hmm_results, hmm_lengths)
Exemplo n.º 11
0
def run_t2pks_hmmscan(
        cds_features: Iterable[CDSFeature]) -> Dict[str, List[HMMResult]]:
    """ Runs hmmscan for type II PKS proteins on the given CDSFeatures

        Arguments:
            cluster: Protocluster on which to run the type II PKS hmmscan

        Returns:
            a dictionary of key: cds and value: list of HMMResults, for hmmscan results of the cluster
    """
    cluster_fasta = fasta.get_fasta_from_features(cds_features)
    hmm_file = path.get_full_path(__file__, "data", "t2pks.hmm")
    hmm_results = subprocessing.run_hmmscan(hmm_file,
                                            cluster_fasta,
                                            opts=['--cut_tc'])
    hmm_lengths = get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(hmm_results, hmm_lengths)
Exemplo n.º 12
0
def generate_domains(record: Record) -> NRPSPKSDomains:
    """ Annotates NRPS/PKS domains on CDS features. The `nrps_pks` member of
        each feature will be updated, along with creating CDSMotif features
        when relevant.

        Arguments:
            record: the secmet.Record of which to annotate CDS features

        Returns:
            a NRPSPKSDomains instance containing all found motifs and domain HMMs for each CDS
    """
    results = NRPSPKSDomains(record.id)

    cds_within_regions = record.get_cds_features_within_regions()
    assert cds_within_regions  # because every cluster should have genes

    fasta = get_fasta_from_features(cds_within_regions)
    cds_domains = find_domains(fasta, record)
    cds_ks_subtypes = find_ks_domains(fasta)
    cds_motifs = find_ab_motifs(fasta)

    prev: Optional[CDSModuleInfo] = None
    for cds in cds_within_regions:
        domains = cds_domains.get(cds.get_name(), [])
        motifs = cds_motifs.get(cds.get_name(), [])
        if not (domains or motifs):
            continue
        subtype_names = match_subtypes_to_ks_domains(
            domains, cds_ks_subtypes.get(cds.get_name(), []))
        domain_type = classify_cds([domain.hit_id for domain in domains],
                                   subtype_names)
        modules = build_modules_for_cds(domains, subtype_names, cds.get_name())
        results.cds_results[cds] = CDSResult(domains, motifs, domain_type,
                                             modules, subtype_names)

        # combine modules that cross CDS boundaries, if possible and relevant
        info = CDSModuleInfo(cds, modules)
        if prev and prev.modules and info.modules:
            combine_modules(
                info,
                prev)  # modifies the lists of modules linked in each CDSResult
        prev = info

    for cds, cds_result in results.cds_results.items():
        cds_result.annotate_domains(record, cds)
    return results
Exemplo n.º 13
0
def classify_genes(
        cds_features: List[CDSFeature]) -> Dict[str, List[HMMResult]]:
    """ Finds possible classifications for the provided genes.

        Arguments:
            cds_features: a list of CDSFeatures to classify

        Returns:
            a dictionary mapping CDS name to a list of HMMResult instances of
                classifications
    """
    smcogs_fasta = fasta.get_fasta_from_features(cds_features)
    smcogs_opts = ["-E", "1E-6"]
    hmm_file = path.get_full_path(__file__, "data", "smcogs.hmm")
    smcogs_results = subprocessing.run_hmmscan(hmm_file, smcogs_fasta,
                                               smcogs_opts)
    hmm_lengths = utils.get_hmm_lengths(hmm_file)
    return refine_hmmscan_results(smcogs_results, hmm_lengths)
Exemplo n.º 14
0
def run_hmmer(record: Record, features: Iterable[CDSFeature], max_evalue: float,
              min_score: float, database: str, tool: str) -> HmmerResults:
    """ Build hmmer results for the given features

        Arguments:
            record: the Record instance to run hmmer over
            features: the list of CDSFeatures to run over specifically
            max_evalue: a maximum evalue allowed for hits (exclusive)
            min_evalue: a minimum evalue allowed for hits (exclusive)
            database: the database to search for hits within
            tool: the name of the specific tool calling into this module
    """
    if not os.path.exists(database):
        raise ValueError("Given database does not exist: %s" % database)
    query_sequence = fasta.get_fasta_from_features(features)
    hmmscan_results = subprocessing.run_hmmscan(database, query_sequence, opts=["--cut_tc"])
    hits = build_hits(record, hmmscan_results, min_score, max_evalue, database)
    return HmmerResults(record.id, max_evalue, min_score, database, tool, hits)
Exemplo n.º 15
0
def run_starter_unit_blastp(
        cluster: Cluster,
        cds_hmm_hits: Dict[str,
                           List[HMMResult]]) -> Dict[str, List[HMMResult]]:
    """ Runs blastp on starter unit coding sequences in given cluster

        Arguments:
            cluster: Cluster on which the blastp shall be run
            cds_hmm_hits: HMMResults by cds from type II PKS hmmscan

        Returns:
            None if no starter unit cds are present otherwise a dictionary of key: cds and value: list of HMMresults, for blastp results of the cluster
    """
    starter_unit_cds = {}
    for cds, hmm_hits in cds_hmm_hits.items():
        starter_unit_hit_ids = [
            hit.hit_id for hit in hmm_hits
            if hit.hit_id in ['KSIII', 'AT', 'AMID', 'LIG']
        ]
        if starter_unit_hit_ids:
            starter_unit_cds[cluster.parent_record.get_cds_by_name(
                cds)] = starter_unit_hit_ids

    if starter_unit_cds:
        blastp_results = []
        blastp_fasta_files = set()
        for cds, starter_unit_hit_ids in starter_unit_cds.items():
            query_sequence = fasta.get_fasta_from_features([cds])
            for hit_id in starter_unit_hit_ids:
                blast_database = path.get_full_path(__file__, 'data', hit_id)
                blastp_results.extend(
                    subprocessing.run_blastp(blast_database, query_sequence))
                blastp_fasta_files.add(
                    path.get_full_path(__file__, 'data', hit_id + '.fasta'))

        fasta_lengths = {}
        for fasta_file in blastp_fasta_files:
            fasta_lengths.update(get_fasta_lengths(fasta_file))

        return refine_hmmscan_results(blastp_results, fasta_lengths)

    return {}
Exemplo n.º 16
0
def find_diamond_matches(
        record: Record,
        database: str) -> Tuple[HitsByCDS, HitsByReferenceName]:
    """ Runs diamond, comparing all features in the record to the given database

        Arguments:
            record: the record to use as a query
            database: the path of the database to compare to

        Returns:
            a tuple of
                a dictionary mapping CDSFeature to
                     a dictionary mapping reference CDS numeric ID to
                        a list of Hits for that reference
                a dictionary mapping reference region name to
                    a dictionary mapping reference CDS numeric ID to
                        a list of Hits for that reference
    """
    logging.info("Comparing regions to reference database")
    extra_args = [
        "--compress",
        "0",
        "--max-target-seqs",
        "10000",
        "--evalue",
        "1e-05",
        "--outfmt",
        "6",  # 6 is blast tabular format, just as in blastp
    ]
    features = record.get_cds_features_within_regions()

    with NamedTemporaryFile() as temp_file:
        temp_file.write(
            fasta.get_fasta_from_features(features,
                                          numeric_names=True).encode())
        temp_file.flush()
        raw = subprocessing.run_diamond_search(temp_file.name,
                                               database,
                                               mode="blastp",
                                               opts=extra_args)
    return blast_parse(raw, dict(enumerate(features)))
Exemplo n.º 17
0
def annotate_domains(record: Record) -> None:
    """ Annotates NRPS/PKS domains on CDS features. The `nrps_pks` member of
        each feature will be updated, along with creating CDSMotif features
        when relevant.

        Arguments:
            record: the secmet.Record of which to annotate CDS features

        Returns:
            None
    """
    cds_within_clusters = record.get_cds_features_within_clusters()
    assert cds_within_clusters  # because every cluster should have genes

    fasta = get_fasta_from_features(cds_within_clusters)
    cds_domains = find_domains(fasta, record)
    cds_motifs = find_ab_motifs(fasta)

    for cds in cds_within_clusters:
        cds_name = cds.get_name()
        # gather domains and classify
        domains = cds_domains.get(cds_name)
        if not domains:
            continue
        domain_type = classify_feature([domain.hit_id for domain in domains])
        cds.nrps_pks.type = domain_type

        for domain in domains:
            cds.nrps_pks.add_domain(domain)

        # construct motif features
        motifs = cds_motifs.get(cds_name)
        if not motifs:
            continue
        motif_features = generate_motif_features(record, cds, motifs)

        for motif in motif_features:
            record.add_cds_motif(motif)
        cds.motifs.extend(motif_features)
Exemplo n.º 18
0
def run_starter_unit_blastp(
    cds_hmm_hits: Dict[CDSFeature,
                       List[HMMResult]]) -> Dict[str, List[HMMResult]]:
    """ Runs blastp on starter unit coding sequences in given cluster

        Arguments:
            cds_hmm_hits: HMMResults by cds from type II PKS hmmscan

        Returns:
            a dictionary mapping CDS name to a list of HMMresults
    """
    blastp_results = []
    blastp_fasta_files = set()
    for cds, hmm_hits in cds_hmm_hits.items():
        query_sequence = fasta.get_fasta_from_features([cds])
        for hit in hmm_hits:
            if hit.hit_id not in ['KSIII', 'AT', 'AMID', 'LIG']:
                continue
            blast_database = path.get_full_path(__file__, 'data', hit.hit_id)
            blastp_results.extend(
                subprocessing.run_blastp(blast_database, query_sequence))
            blastp_fasta_files.add(
                path.get_full_path(__file__, 'data', hit.hit_id + '.fasta'))

    if not blastp_results:
        return {}

    fasta_lengths = {}
    for fasta_file in blastp_fasta_files:
        fasta_lengths.update(get_fasta_lengths(fasta_file))

    results = refine_hmmscan_results(blastp_results, fasta_lengths)
    for hits in results.values():
        for i, hit in enumerate(hits):
            if not hit.hit_id.endswith("-CoA"):
                hits[i] = HMMResult(hit.hit_id + "-CoA", hit.query_start,
                                    hit.query_end, hit.evalue, hit.bitscore)
    return results
Exemplo n.º 19
0
def acquire_rodeo_heuristics(
        cluster: secmet.Protocluster, query: secmet.CDSFeature, leader: str,
        core: str, domains: Dict[str,
                                 int]) -> Tuple[int, List[float], List[int]]:
    """Calculate heuristic scores for RODEO"""
    tabs = []
    score = 0
    precursor = leader + core
    # Calcd. precursor peptide mass (Da)
    precursor_analysis = utils.RobustProteinAnalysis(precursor,
                                                     monoisotopic=True,
                                                     ignore_invalid=False)
    tabs.append(float(precursor_analysis.molecular_weight()))
    # Calcd. leader peptide mass (Da)
    leader_analysis = utils.RobustProteinAnalysis(leader,
                                                  monoisotopic=True,
                                                  ignore_invalid=False)
    tabs.append(float(leader_analysis.molecular_weight()))
    # Calcd. core peptide mass (Da)
    core_analysis = utils.RobustProteinAnalysis(core,
                                                monoisotopic=True,
                                                ignore_invalid=False)
    tabs.append(float(core_analysis.molecular_weight()))
    # Distance to any biosynthetic protein (E, B, C)
    hmmer_profiles = ['PF04055']
    distance = utils.distance_to_pfam(cluster.parent_record, query,
                                      hmmer_profiles)
    tabs.append(distance)
    # rSAM within 500 nt?
    if utils.distance_to_pfam(cluster.parent_record, query, ['PF04055']) < 500:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # rSAM within 150 nt?
    if utils.distance_to_pfam(cluster.parent_record, query, ['PF04055']) < 150:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # rSAM further than 1000 nt?
    if utils.distance_to_pfam(cluster.parent_record, query, ['PF04055']) == -1 or \
       utils.distance_to_pfam(cluster.parent_record, query, ['PF04055']) > 10000:
        score -= 2
        tabs.append(1)
    else:
        tabs.append(0)
    # Ratio of N-term to 1st Cys 0.25<x<0.60; Ratio of N-term to 1st Cys <0.25 or >0.60
    if "C" not in precursor:
        score -= 2
        tabs += [0, 1]
    elif 0.25 <= precursor.find("C") / len(precursor) <= 0.60:
        score += 2
        tabs += [1, 0]
    else:
        score -= 2
        tabs += [0, 1]
    # Three or more Cys; Less than 3 Cys
    if precursor.count("C") >= 3:
        score += 4
        tabs += [1, 0]
    else:
        score -= 4
        tabs += [0, 1]
    # CxC/CxxC/CxxxC/CxxxxxC; # CC/CCC
    motifs = (('C.{5}C', 2), ('C.{3}C', 1), ('C.{2}C', 1), ('C.{1}C', 1),
              ('CC', -2), ('CCC', -2))
    for motif in motifs:
        if re.search(motif[0], core):
            score += motif[1]
            tabs.append(1)
        else:
            tabs.append(0)
    # No Cys in last 1/4th?
    quarter_length = -len(precursor) // 4
    if "C" not in precursor[quarter_length:]:
        score += 1
        tabs.append(1)
    else:
        score -= 1
        tabs.append(0)
    # 2 Cys in first 2/3rds of precursor, 1 Cys in last 1/3rd of precursor
    two_thirds = 2 * len(precursor) // 3
    if precursor[:two_thirds].count("C") == 2 and precursor[two_thirds:].count(
            "C") == 1:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # Peptide matches SboA hmm
    if cds_has_domains(query, {"Subtilosin_A"}):
        score += 3
        tabs.append(1)
    else:
        tabs.append(0)
    # Peptide matches SkfA hmm
    if cds_has_domains(query, {"TIGR04404"}):
        score += 3
        tabs.append(1)
    else:
        tabs.append(0)
    # Peptide matches SCIFF hmm
    if cds_has_domains(query, {"TIGR03973"}):
        score += 2
        tabs.append(1)
    else:
        tabs.append(0)
    # cluster has PqqD/RRE (PF05402)
    if "PF05402" in domains:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # cluster has SPASM domain (PF13186)
    if "PF13186" in domains:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # PF04055 (rSAM) domain start > 80
    runresults = subprocessing.run_hmmsearch(
        path.get_full_path(__file__, "data", "PF04055.hmm"),
        fasta.get_fasta_from_features(cluster.cds_children))
    max_start = 0
    hitstarts = []
    hitends = []
    for runresult in runresults:
        # Store result if it is above cut-off
        for hsp in runresult.hsps:
            if hsp.bitscore > 40:
                hitstarts.append(hsp.hit_start)
                max_start = max(hsp.hit_start, max_start)
                hitends.append(hsp.hit_end)
    if hitstarts and max_start > 80:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # cluster has peptidase
    peptidase_domains = [
        "Peptidase_M16_C", "Peptidase_S8", "Peptidase_M16", "Peptidase_S41"
    ]
    no_peptidase = True
    for pepdom in peptidase_domains:
        if pepdom in domains:
            score += 1
            tabs.append(1)
            no_peptidase = False
        else:
            tabs.append(0)
    # cluster has transporter
    transport_domains = ["PF00005", "PF00664"]
    for transpdom in transport_domains:
        if transpdom in domains:
            score += 1
            tabs.append(1)
        else:
            tabs.append(0)
    # cluster has response regulator (PF00072)
    if "PF00072" in domains:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # cluster has major facilitator (PF07690)
    if "PF07690" in domains:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # cluster has ATPase (PF13304)
    if "PF13304" in domains:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # cluster has Fer4_12 (PF13353)
    if "PF13353" in domains:
        score += 1
        tabs.append(1)
    else:
        tabs.append(0)
    # cluster has rSAM (PF04055)
    if "PF04055" in domains or "TIGR03975" in domains:
        score += 2
        tabs.append(1)
    else:
        tabs.append(0)
    # cluster has no recognized peptidase
    if no_peptidase:
        score -= 2
        tabs.append(1)
    else:
        tabs.append(0)
    # C-terminal portion is < 0.35 or > 0.65; C-terminal portion is defined as
    # the part from the last cysteine in the last identified Cx(n)C motif to the C-terminus
    # the binary opposite is also included as the next field
    last_motif_c = 0
    index = -1
    for aa in reversed(precursor):
        if aa == "C" and "C" in precursor[index - 6:index]:
            last_motif_c = index + 1
        index -= 1
    if 0.35 <= last_motif_c / len(precursor) <= 0.65:
        score += 3
        tabs += [0, 1]
    else:
        score -= 2
        tabs += [1, 0]
    # SS profile count > 1
    # is there more than one Cx..C structure in the sequence
    cysrex = '(?=(C.{%d,%d}C))' % (CHAIN_LOWER, CHAIN_UPPER)
    rex4 = re.compile(cysrex)
    if len(rex4.findall(core)) > 1:
        score += 2
        tabs.append(1)
    else:
        tabs.append(0)
    return score, tabs, hitends