示例#1
0
def test_genbank_utility_gp():
    """
    Check whether the high-level utility functions return the expected
    content of a known GenPept file. 
    """
    gp_file = gb.GenBankFile.read(join(data_dir("sequence"), "bt_lysozyme.gp"))
    #[print(e) for e in gp_file._field_pos]
    assert gb.get_locus(gp_file) \
        == ("AAC37312", 147, "", False, "MAM", "27-APR-1993")
    assert gb.get_definition(gp_file) == "lysozyme [Bos taurus]."
    assert gb.get_version(gp_file) == "AAC37312.1"
    assert gb.get_gi(gp_file) == 163334
    annotation = gb.get_annotation(gp_file)
    feature = seq.Feature(
        "Site",
        [seq.Location(start, stop) for start, stop in zip(
            [52,55,62,76,78,81,117,120,125],
            [53,55,62,76,78,81,117,120,126]
        )],
        {"note": "lysozyme catalytic cleft [active]", "site_type": "active"}
    )
    in_annotation = False
    for f in annotation:
        if f.key == feature.key and f.locs == feature.locs and \
           all([(key, val in f.qual.items())
                for key, val in feature.qual.items()]):
                    in_annotation = True
    assert in_annotation
    assert len(gb.get_sequence(gp_file, format="gp")) == 147
示例#2
0
def test_genbank_utility_gb():
    """
    Check whether the high-level utility functions return the expected
    content of a known GenBank file. 
    """
    gb_file = gb.GenBankFile.read(join(data_dir("sequence"), "ec_bl21.gb"))
    assert gb.get_locus(gb_file) \
        == ("CP001509", 4558953, "DNA", True, "BCT", "16-FEB-2017")
    assert gb.get_definition(gb_file) \
        == ("Escherichia coli BL21(DE3), complete genome.")
    assert gb.get_version(gb_file) == "CP001509.3"
    assert gb.get_gi(gb_file) == 296142109
    assert gb.get_db_link(gb_file) \
        == {"BioProject" : "PRJNA20713", "BioSample" : "SAMN02603478"}
    annotation = gb.get_annotation(gb_file, include_only=["CDS"])
    feature = seq.Feature(
        "CDS",
        [seq.Location(5681, 6457, seq.Location.Strand.REVERSE)],
        {"gene": "yaaA", "transl_table": "11"}
    )
    in_annotation = False
    for f in annotation:
        if f.key == feature.key and f.locs == feature.locs and \
           all([(key, val in f.qual.items())
                for key, val in feature.qual.items()]):
                    in_annotation = True
    assert in_annotation
    assert len(gb.get_sequence(gb_file, format="gb")) == 4558953
示例#3
0
# :class:`GenBankFile` provides a low-level interface.
# In contrast, the :mod:`biotite.sequence.io.genbank` module contains
# high-level functions to directly obtain useful objects from a
# :class:`GenBankFile` object.

import biotite.sequence.io.genbank as gb

file_path = entrez.fetch("AJ311647",
                         biotite.temp_dir(),
                         suffix="gb",
                         db_name="nuccore",
                         ret_type="gb")
file = gb.GenBankFile()
file.read(file_path)
print("Accession:", gb.get_accession(file))
print("Definition:", gb.get_definition(file))

########################################################################
#
# .. currentmodule:: biotite.sequence
#
# Now that we have loaded the file, we want to have a look at the
# sequence features.
# Therefore, we grab the annotation from the file.
# An annotation is the collection of features corresponding to one
# sequence (the sequence itself is not included, though).
# In case of *Biotite* we can get an :class:`Annotation` object from the
# :class:`GenBankFile`.
# This :class:`Annotation` can be iterated in order to obtain single
# :class:`Feature` objects.
# Each :class:`Feature` contains 3 pieces of information: Its feature
示例#4
0
        & entrez.SimpleQuery("srcdb_swiss-prot", "Properties")
# Search for the first 200 hits
# More than 200 UIDs are not recommended for the EFetch service
# for a single fetch
uids = entrez.search(query, db_name="protein", number=200)
file_name = entrez.fetch_single_file(uids, biotite.temp_file("gp"),
                              db_name="protein", ret_type="gp")
# The file contains multiple concatenated GenPept files
# -> Usage of MultiFile
multi_file = gb.MultiFile()
multi_file.read(file_name)
# Separate MultiFile into single GenBankFile instances
files = [f for f in multi_file]
print("Definitions:")
for file in files[:20]:
    print(gb.get_definition(file))
print()
print("Sources:")
for file in files[:20]:
    print(gb.get_source(file))

########################################################################
# The names of the sources are too long to be properly displayed later
# on. Therefore, we write a function that creates a proper abbreviation
# for a species name.

def abbreviate(species):
    # Remove possible brackets
    species = species.replace("[","").replace("]","")
    splitted_species= species.split()
    return "{:}. {:}".format(splitted_species[0][0], splitted_species[1])