Пример #1
0
    def run_both_dirs(self, expected, seq):
        def reverse_location(location, length):
            return location._flip(length)

        record = DummyRecord(seq=seq)
        assert expected == [feat.location for feat in find_all_orfs(record)]
        record.seq = record.seq.reverse_complement()

        expected = [reverse_location(loc, len(seq)) for loc in expected[::-1]]
        assert expected == [feat.location for feat in find_all_orfs(record)]
Пример #2
0
 def test_multi_start_single_stop(self):
     seq = "ATGNNNATG" + "N" * 60 + "TAG"
     expected = [
         FeatureLocation(ExactPosition(0), ExactPosition(72), strand=1)
     ]
     assert expected == [
         feat.location for feat in find_all_orfs(DummyRecord(seq=seq))
     ]
     seq = str(DummyRecord(seq=seq).seq.reverse_complement())
     expected[0].strand = -1
     assert expected == [
         feat.location for feat in find_all_orfs(DummyRecord(seq=seq))
     ]
Пример #3
0
def specific_analysis(record: secmet.Record) -> ThioResults:
    """ Runs thiopeptide prediction over all cluster features and any extra ORFs
        that are found not overlapping with existing features
    """
    results = ThioResults(record.id)
    for cluster in record.get_protoclusters():
        if cluster.product != "thiopeptide":
            continue

        # Find candidate ORFs that are not yet annotated
        new_orfs = all_orfs.find_all_orfs(record, cluster)

        thio_features = list(cluster.cds_children) + new_orfs
        domains = get_detected_domains(cluster)
        thio_type = predict_type_from_cluster(domains)

        amidation = predict_amidation(domains)

        for thio_feature in thio_features:
            result_vec = run_thiopred(thio_feature, thio_type, domains)

            if result_vec is None:
                continue

            if amidation:
                result_vec.amidation = True
            new_feature = result_vec_to_feature(thio_feature, result_vec)
            if thio_feature in new_orfs:
                results.cds_features[cluster.get_protocluster_number()].append(thio_feature)
            results.motifs.append(new_feature)
            results.clusters_with_motifs.add(cluster)
    logging.debug("Thiopeptides marked %d motifs", len(results.motifs))
    return results
Пример #4
0
def specific_analysis(record: Record) -> LassoResults:
    """ Runs the full lassopeptide analysis over the given record

        Arguments:
            record: the Record instance to analyse

        Returns:
            A populated LassoResults object
    """
    results = LassoResults(record.id)
    motif_count = 0
    for cluster in record.get_clusters():
        if cluster.product != 'lassopeptide':
            continue

        precursor_candidates = list(cluster.cds_children)

        # Find candidate ORFs that are not yet annotated
        extra_orfs = all_orfs.find_all_orfs(record, cluster)
        precursor_candidates.extend(extra_orfs)

        for candidate in precursor_candidates:
            motif = run_lassopred(record, cluster, candidate)
            if motif is None:
                continue

            results.motifs_by_locus[candidate.get_name()].append(motif)
            motif_count += 1
            results.clusters[cluster.get_cluster_number()].add(candidate.get_name())
            # track new CDSFeatures if found with all_orfs
            if candidate.region is None:
                results.new_cds_features.add(candidate)

    logging.debug("Lassopeptide module marked %d motifs", motif_count)
    return results
Пример #5
0
def specific_analysis(record: secmet.Record,
                      options: ConfigType) -> SactiResults:
    """ Analyse each sactipeptide cluster and find precursors within it.
        If an unannotated ORF would contain the precursor, it will be annotated.

        Arguments:
            record: the Record to analyse

        Returns:
            a SactiResults instance holding all found precursors and new ORFs
    """
    results = SactiResults(record.id)
    new_feature_hits = 0
    motif_count = 0
    counter = 0
    for cluster in record.get_protoclusters():
        if cluster.product != 'sactipeptide':
            continue

        # Find candidate ORFs that are not yet annotated
        new_orfs = all_orfs.find_all_orfs(record, cluster)
        hmm_results = run_non_biosynthetic_phmms(
            fasta.get_fasta_from_features(new_orfs))
        annotate_orfs(new_orfs, hmm_results)

        # Get all CDS features to evaluate for RiPP-likeness
        candidates = list(cluster.cds_children) + new_orfs
        domains = get_detected_domains(cluster)

        # Evaluate each candidate precursor peptide
        for candidate in candidates:
            motif = run_sactipred(cluster, candidate, domains)
            if motif is None:
                continue

            results.motifs_by_locus[candidate.get_name()].append(motif)
            motif_count += 1
            results.clusters[cluster.get_protocluster_number()].add(
                candidate.get_name())
            # track new CDSFeatures if found with all_orfs
            if candidate.region is None:
                results.new_cds_features.add(candidate)
                new_feature_hits += 1

        # Analyze the cluster with RREfinder
        counter += 1
        name = '%s_%s_%s' % (record.id, cluster.product, counter)
        RRE_main(cluster, results, name, options)

    if not motif_count:
        logging.debug("Found no sactipeptide motifs")
    else:
        verb = "is" if new_feature_hits == 1 else "are"
        logging.debug(
            "Found %d sactipeptide motif(s) in %d feature(s), %d of which %s new",
            motif_count, len(results.motifs_by_locus), new_feature_hits, verb)
    return results
Пример #6
0
def run_specific_analysis(record: Record,
                          options: ConfigType) -> LanthiResults:
    """ Runs the full lanthipeptide analysis over the given record

        Arguments:
            record: the Record instance to analyse

        Returns:
            A populated LanthiResults object
    """
    results = LanthiResults(record.id)
    counter = 0
    for cluster in record.get_protoclusters():
        if cluster.product != 'lanthipeptide':
            continue

        # find core biosynthetic enzyme locations
        core_domain_names = {
            'Lant_dehydr_N', 'Lant_dehydr_C', 'DUF4135', 'Pkinase'
        }
        core_genes = []
        for gene in cluster.cds_children:
            if not gene.sec_met:
                continue
            # We seem to hit Lant_dehydr_C on some O-Methyltranferases that also hit PCMT
            if 'PCMT' in gene.sec_met.domain_ids:
                continue
            if core_domain_names.intersection(set(gene.sec_met.domain_ids)):
                core_genes.append(gene)

        precursor_candidates = find_lan_a_features(cluster)
        # Find candidate ORFs that are not yet annotated
        extra_orfs = all_orfs.find_all_orfs(record, cluster)
        for orf in extra_orfs:
            if len(orf.translation) < 80:
                precursor_candidates.append(orf)

        for gene in core_genes:
            neighbours = find_neighbours_in_range(gene, precursor_candidates)
            if not neighbours:
                continue
            run_lanthi_on_genes(record, gene, cluster, neighbours, results)

        # Analyze the cluster with RREfinder
        counter += 1
        name = '%s_%s_%s' % (record.id, cluster.product, counter)
        RRE_main(cluster, results, name, options)

    logging.debug("Lanthipeptide module marked %d motifs",
                  sum(map(len, results.motifs_by_locus)))
    return results
Пример #7
0
def run_specific_analysis(record: Record) -> LanthiResults:
    """ Runs the full lanthipeptide analysis over the given record

        Arguments:
            record: the Record instance to analyse

        Returns:
            A populated LanthiResults object
    """
    results = LanthiResults(record.id)
    for cluster in record.get_clusters():
        if 'lanthipeptide' not in cluster.products:
            continue

        # find core biosynthetic enzyme locations
        core_domain_names = {
            'Lant_dehyd_N', 'Lant_dehyd_C', 'DUF4135', 'Pkinase'
        }
        core_genes = []
        for gene in cluster.cds_children:
            if not gene.sec_met:
                continue
            if core_domain_names.intersection(set(gene.sec_met.domain_ids)):
                core_genes.append(gene)

        precursor_candidates = find_lan_a_features(cluster)
        # Find candidate ORFs that are not yet annotated
        extra_orfs = all_orfs.find_all_orfs(record, cluster)
        for orf in extra_orfs:
            if len(orf.translation) < 80:
                precursor_candidates.append(orf)

        for gene in core_genes:
            neighbours = find_neighbours_in_range(gene, precursor_candidates)
            run_lanthi_on_genes(record, gene, neighbours, results)

    logging.debug("Lanthipeptide module marked %d motifs",
                  sum(map(len, results.motifs_by_locus)))
    return results