def find_all_orfs(record: Record, cluster: Optional[Cluster] = None) -> List[CDSFeature]: """ Find all ORFs of at least 60 bases that don't overlap with existing CDS features. Can (and should) be limited to just within a cluster. Arguments: record: the record to search cluster: the specific Cluster to search within, or None Returns: a list of CDSFeatures, one for each ORF """ # Get sequence for the range offset = 0 seq = record.seq existing = record.get_cds_features() if cluster: seq = record.seq[cluster.location.start:cluster.location.end] offset = cluster.location.start existing = tuple(cluster.cds_children) # Find orfs throughout the range forward_matches = scan_orfs(seq, 1, offset) reverse_matches = scan_orfs(seq.reverse_complement(), -1, offset) locations = forward_matches + reverse_matches orfnr = 1 new_features = [] for location in locations: if cluster: if isinstance(location.start, (BeforePosition, AfterPosition)): continue if isinstance(location.end, (BeforePosition, AfterPosition)): continue dummy_feature = Feature(location, feature_type="dummy") # skip if overlaps with existing CDSs if any(dummy_feature.overlaps_with(cds) for cds in existing): continue feature = create_feature_from_location(record, location, orfnr) # skip if not wholly contained in the cluster if cluster and not feature.is_contained_by(cluster): continue new_features.append(feature) orfnr += 1 return new_features
def find_all_orfs(record: Record, area: Optional[CDSCollection] = None) -> List[CDSFeature]: """ Find all ORFs of at least 60 bases that don't overlap with existing CDS features. Can (and should) be limited to just within a specific section of the record. Arguments: record: the record to search area: the specific CDSCollection to search within, or None Returns: a list of CDSFeatures, one for each ORF """ # Get sequence for the range offset = 0 seq = record.seq existing: Iterable[CDSFeature] = record.get_cds_features() if area: seq = area.extract(seq) offset = area.location.start existing = record.get_cds_features_within_location(area.location, with_overlapping=True) # Find orfs throughout the range forward_matches = scan_orfs(seq, 1, offset) reverse_matches = scan_orfs(seq.reverse_complement(), -1, offset) locations = forward_matches + reverse_matches new_features = [] for location in locations: if area: if isinstance(location.start, (BeforePosition, AfterPosition)): continue if isinstance(location.end, (BeforePosition, AfterPosition)): continue dummy_feature = Feature(location, feature_type="dummy") # skip if overlaps with existing CDSs if any(dummy_feature.overlaps_with(cds) for cds in existing): continue feature = create_feature_from_location(record, location) # skip if not wholly contained in the area if area and not feature.is_contained_by(area): continue new_features.append(feature) return new_features
def store_promoters(promoters: Iterable[Promoter], record: Record) -> None: """Store information about promoter sequences to a SeqRecord""" for promoter in promoters: # remember to account for 0-indexed start location new_feature = SeqFeature(FeatureLocation(max(0, promoter.start - 1), promoter.end), type="promoter") new_feature.qualifiers = { "locus_tag": promoter.get_gene_names( ), # already a list with one or two elements "seq": [str(promoter.seq)], } if isinstance(promoter, CombinedPromoter): new_feature.qualifiers["note"] = ["bidirectional promoter"] secmet_version = Feature.from_biopython(new_feature) secmet_version.created_by_antismash = True record.add_feature(secmet_version)
def create_feature_from_location(record, location, counter=1, label=None) -> CDSFeature: """ Creates a CDS feature covering the provided location. Arguments: record: The Record the CDSFeature will belong to, used to generate the feature translation location: The FeatureLocation specifying the location of the CDSFeature counter: An integer to use to format a default label 'allorf' with, used only if label not provided label: The locus tag, protein id, and gene name to use for the new CDSFeature Returns: The CDSFeature created. """ if label is None: label = 'allorf%03d' % counter dummy = Feature(location, feature_type="temp") feature = CDSFeature(location, str(record.get_aa_translation_of_feature(dummy)), locus_tag=label, protein_id=label, gene=label) feature.created_by_antismash = True return feature