def test_genbank_utility_gp(): """ Check whether the high-level utility functions return the expected content of a known GenPept file. """ gp_file = gb.GenBankFile.read(join(data_dir("sequence"), "bt_lysozyme.gp")) #[print(e) for e in gp_file._field_pos] assert gb.get_locus(gp_file) \ == ("AAC37312", 147, "", False, "MAM", "27-APR-1993") assert gb.get_definition(gp_file) == "lysozyme [Bos taurus]." assert gb.get_version(gp_file) == "AAC37312.1" assert gb.get_gi(gp_file) == 163334 annotation = gb.get_annotation(gp_file) feature = seq.Feature( "Site", [seq.Location(start, stop) for start, stop in zip( [52,55,62,76,78,81,117,120,125], [53,55,62,76,78,81,117,120,126] )], {"note": "lysozyme catalytic cleft [active]", "site_type": "active"} ) in_annotation = False for f in annotation: if f.key == feature.key and f.locs == feature.locs and \ all([(key, val in f.qual.items()) for key, val in feature.qual.items()]): in_annotation = True assert in_annotation assert len(gb.get_sequence(gp_file, format="gp")) == 147
def test_genbank_utility_gb(): """ Check whether the high-level utility functions return the expected content of a known GenBank file. """ gb_file = gb.GenBankFile.read(join(data_dir("sequence"), "ec_bl21.gb")) assert gb.get_locus(gb_file) \ == ("CP001509", 4558953, "DNA", True, "BCT", "16-FEB-2017") assert gb.get_definition(gb_file) \ == ("Escherichia coli BL21(DE3), complete genome.") assert gb.get_version(gb_file) == "CP001509.3" assert gb.get_gi(gb_file) == 296142109 assert gb.get_db_link(gb_file) \ == {"BioProject" : "PRJNA20713", "BioSample" : "SAMN02603478"} annotation = gb.get_annotation(gb_file, include_only=["CDS"]) feature = seq.Feature( "CDS", [seq.Location(5681, 6457, seq.Location.Strand.REVERSE)], {"gene": "yaaA", "transl_table": "11"} ) in_annotation = False for f in annotation: if f.key == feature.key and f.locs == feature.locs and \ all([(key, val in f.qual.items()) for key, val in feature.qual.items()]): in_annotation = True assert in_annotation assert len(gb.get_sequence(gb_file, format="gb")) == 4558953
# :class:`GenBankFile` provides a low-level interface. # In contrast, the :mod:`biotite.sequence.io.genbank` module contains # high-level functions to directly obtain useful objects from a # :class:`GenBankFile` object. import biotite.sequence.io.genbank as gb file_path = entrez.fetch("AJ311647", biotite.temp_dir(), suffix="gb", db_name="nuccore", ret_type="gb") file = gb.GenBankFile() file.read(file_path) print("Accession:", gb.get_accession(file)) print("Definition:", gb.get_definition(file)) ######################################################################## # # .. currentmodule:: biotite.sequence # # Now that we have loaded the file, we want to have a look at the # sequence features. # Therefore, we grab the annotation from the file. # An annotation is the collection of features corresponding to one # sequence (the sequence itself is not included, though). # In case of *Biotite* we can get an :class:`Annotation` object from the # :class:`GenBankFile`. # This :class:`Annotation` can be iterated in order to obtain single # :class:`Feature` objects. # Each :class:`Feature` contains 3 pieces of information: Its feature
& entrez.SimpleQuery("srcdb_swiss-prot", "Properties") # Search for the first 200 hits # More than 200 UIDs are not recommended for the EFetch service # for a single fetch uids = entrez.search(query, db_name="protein", number=200) file_name = entrez.fetch_single_file(uids, biotite.temp_file("gp"), db_name="protein", ret_type="gp") # The file contains multiple concatenated GenPept files # -> Usage of MultiFile multi_file = gb.MultiFile() multi_file.read(file_name) # Separate MultiFile into single GenBankFile instances files = [f for f in multi_file] print("Definitions:") for file in files[:20]: print(gb.get_definition(file)) print() print("Sources:") for file in files[:20]: print(gb.get_source(file)) ######################################################################## # The names of the sources are too long to be properly displayed later # on. Therefore, we write a function that creates a proper abbreviation # for a species name. def abbreviate(species): # Remove possible brackets species = species.replace("[","").replace("]","") splitted_species= species.split() return "{:}. {:}".format(splitted_species[0][0], splitted_species[1])