def from_biopython(bio_feature: SeqFeature, feature: "CDSFeature" = None, # type: ignore leftovers: Optional[Dict] = None, record: Any = None) -> "CDSFeature": if leftovers is None: leftovers = Feature.make_qualifiers_copy(bio_feature) # grab mandatory qualifiers transl_table = 1 if record: transl_table = record.transl_table if "transl_table" in leftovers: transl_table = int(leftovers.pop("transl_table")[0]) # semi-optional qualifiers protein_id = leftovers.pop("protein_id", [None])[0] locus_tag = leftovers.pop("locus_tag", [None])[0] gene = leftovers.pop("gene", [None])[0] if not (gene or protein_id or locus_tag): if "pseudo" in leftovers or "pseudogene" in leftovers: gene = "pseudo%s_%s" else: gene = "cds%s_%s" gene = gene % (bio_feature.location.start, bio_feature.location.end) name = locus_tag or protein_id or gene try: _verify_location(bio_feature.location) except Exception as err: message = "invalid location for %s: %s" % (name, str(err)) raise SecmetInvalidInputError(message) from err try: translation = _ensure_valid_translation(leftovers.pop("translation", [""])[0], bio_feature.location, transl_table, record) except ValueError as err: raise SecmetInvalidInputError(str(err) + ": %s" % name) from err feature = CDSFeature(bio_feature.location, translation, gene=gene, locus_tag=locus_tag, protein_id=protein_id, translation_table=transl_table) # grab optional qualifiers feature.product = leftovers.pop("product", [""])[0] sec_met = leftovers.pop("sec_met_domain", None) if sec_met: feature.sec_met = SecMetQualifier.from_biopython(sec_met) gene_functions = leftovers.pop("gene_functions", []) if gene_functions: feature.gene_functions.add_from_qualifier(gene_functions) feature.nrps_pks.add_from_qualifier(leftovers.pop("NRPS_PKS", [])) # grab parent optional qualifiers super(CDSFeature, feature).from_biopython(bio_feature, feature=feature, leftovers=leftovers) return feature
def from_biopython(bio_feature: SeqFeature, feature: "CDSFeature" = None, # type: ignore leftovers: Optional[Dict] = None, record: Any = None) -> "CDSFeature": if leftovers is None: leftovers = Feature.make_qualifiers_copy(bio_feature) # grab mandatory qualifiers transl_table = 1 if record: transl_table = record.transl_table if "transl_table" in leftovers: transl_table = int(leftovers.pop("transl_table")[0]) translation = leftovers.pop("translation", [""])[0] # semi-optional qualifiers protein_id = leftovers.pop("protein_id", [None])[0] locus_tag = leftovers.pop("locus_tag", [None])[0] gene = leftovers.pop("gene", [None])[0] if not (gene or protein_id or locus_tag): if "pseudo" in leftovers or "pseudogene" in leftovers: gene = "pseudo%s_%s" else: gene = "cds%s_%s" gene = gene % (bio_feature.location.start, bio_feature.location.end) try: _verify_location(bio_feature.location) except Exception as err: message = "invalid location for %s: %s" % (gene or protein_id or locus_tag, str(err)) raise SecmetInvalidInputError(message) from err # ensure translation is valid if it exists if translation: invalid = set(translation) - _VALID_TRANSLATION_CHARS if invalid: logging.warning("Regenerating translation for CDS %s (at %s) containing invalid characters: %s", locus_tag or protein_id or gene, bio_feature.location, invalid) translation = "" # ensure that the translation fits if not _is_valid_translation_length(translation, bio_feature.location): raise SecmetInvalidInputError("translation longer than location allows: %s > %s" % ( len(translation) * 3, len(bio_feature.location))) # finally, generate the translation if it doesn't exist if not translation: if not record: raise SecmetInvalidInputError("no translation in CDS and no record to generate it with") if bio_feature.location.end > len(record.seq): raise SecmetInvalidInputError("feature missing translation and sequence too short: %s" % ( (gene or protein_id or locus_tag))) translation = record.get_aa_translation_from_location(bio_feature.location, transl_table) assert _is_valid_translation_length(translation, bio_feature.location) feature = CDSFeature(bio_feature.location, translation, gene=gene, locus_tag=locus_tag, protein_id=protein_id, translation_table=transl_table) # grab optional qualifiers feature.product = leftovers.pop("product", [""])[0] sec_met = leftovers.pop("sec_met_domain", None) if sec_met: feature.sec_met = SecMetQualifier.from_biopython(sec_met) gene_functions = leftovers.pop("gene_functions", []) if gene_functions: feature.gene_functions.add_from_qualifier(gene_functions) # grab parent optional qualifiers super(CDSFeature, feature).from_biopython(bio_feature, feature=feature, leftovers=leftovers) return feature