示例#1
0
def find_all_orfs(record: Record,
                  cluster: Optional[Cluster] = None) -> List[CDSFeature]:
    """ Find all ORFs of at least 60 bases that don't overlap with existing
        CDS features.

        Can (and should) be limited to just within a cluster.

        Arguments:
            record: the record to search
            cluster: the specific Cluster to search within, or None

        Returns:
            a list of CDSFeatures, one for each ORF
    """
    # Get sequence for the range
    offset = 0
    seq = record.seq
    existing = record.get_cds_features()
    if cluster:
        seq = record.seq[cluster.location.start:cluster.location.end]
        offset = cluster.location.start
        existing = tuple(cluster.cds_children)

    # Find orfs throughout the range
    forward_matches = scan_orfs(seq, 1, offset)
    reverse_matches = scan_orfs(seq.reverse_complement(), -1, offset)
    locations = forward_matches + reverse_matches

    orfnr = 1
    new_features = []

    for location in locations:
        if cluster:
            if isinstance(location.start, (BeforePosition, AfterPosition)):
                continue
            if isinstance(location.end, (BeforePosition, AfterPosition)):
                continue
        dummy_feature = Feature(location, feature_type="dummy")
        # skip if overlaps with existing CDSs
        if any(dummy_feature.overlaps_with(cds) for cds in existing):
            continue

        feature = create_feature_from_location(record, location, orfnr)

        # skip if not wholly contained in the cluster
        if cluster and not feature.is_contained_by(cluster):
            continue

        new_features.append(feature)
        orfnr += 1

    return new_features
示例#2
0
def find_all_orfs(record: Record, area: Optional[CDSCollection] = None) -> List[CDSFeature]:
    """ Find all ORFs of at least 60 bases that don't overlap with existing
        CDS features.

        Can (and should) be limited to just within a specific section of the record.

        Arguments:
            record: the record to search
            area: the specific CDSCollection to search within, or None

        Returns:
            a list of CDSFeatures, one for each ORF
    """
    # Get sequence for the range
    offset = 0
    seq = record.seq
    existing: Iterable[CDSFeature] = record.get_cds_features()
    if area:
        seq = area.extract(seq)
        offset = area.location.start
        existing = record.get_cds_features_within_location(area.location,
                                                           with_overlapping=True)

    # Find orfs throughout the range
    forward_matches = scan_orfs(seq, 1, offset)
    reverse_matches = scan_orfs(seq.reverse_complement(), -1, offset)
    locations = forward_matches + reverse_matches

    new_features = []

    for location in locations:
        if area:
            if isinstance(location.start, (BeforePosition, AfterPosition)):
                continue
            if isinstance(location.end, (BeforePosition, AfterPosition)):
                continue
        dummy_feature = Feature(location, feature_type="dummy")
        # skip if overlaps with existing CDSs
        if any(dummy_feature.overlaps_with(cds) for cds in existing):
            continue

        feature = create_feature_from_location(record, location)

        # skip if not wholly contained in the area
        if area and not feature.is_contained_by(area):
            continue

        new_features.append(feature)

    return new_features
示例#3
0
def store_promoters(promoters: Iterable[Promoter], record: Record) -> None:
    """Store information about promoter sequences to a SeqRecord"""
    for promoter in promoters:
        # remember to account for 0-indexed start location
        new_feature = SeqFeature(FeatureLocation(max(0, promoter.start - 1),
                                                 promoter.end),
                                 type="promoter")
        new_feature.qualifiers = {
            "locus_tag": promoter.get_gene_names(
            ),  # already a list with one or two elements
            "seq": [str(promoter.seq)],
        }

        if isinstance(promoter, CombinedPromoter):
            new_feature.qualifiers["note"] = ["bidirectional promoter"]

        secmet_version = Feature.from_biopython(new_feature)
        secmet_version.created_by_antismash = True

        record.add_feature(secmet_version)
示例#4
0
def create_feature_from_location(record, location, counter=1, label=None) -> CDSFeature:
    """ Creates a CDS feature covering the provided location.

        Arguments:
            record: The Record the CDSFeature will belong to, used to generate
                    the feature translation
            location: The FeatureLocation specifying the location of the CDSFeature
            counter: An integer to use to format a default label 'allorf' with,
                     used only if label not provided
            label: The locus tag, protein id, and gene name to use for the new
                   CDSFeature

        Returns:
            The CDSFeature created.
    """
    if label is None:
        label = 'allorf%03d' % counter
    dummy = Feature(location, feature_type="temp")
    feature = CDSFeature(location, str(record.get_aa_translation_of_feature(dummy)),
                         locus_tag=label, protein_id=label, gene=label)
    feature.created_by_antismash = True
    return feature