Exemplo n.º 1
0
def test_conversion_highlevel(path):
    """
    Test whether the high-level GenBank interface can properly read
    the locus, annotation and sequence from GenBank file and write
    these properties to a file, without data changing.
    """
    suffix = path[-2:]
    gb_file = gb.GenBankFile.read(path)
    ref_locus = gb.get_locus(gb_file)
    ref_annot_seq = gb.get_annotated_sequence(gb_file, format=suffix)

    gb_file = gb.GenBankFile()
    gb.set_locus(gb_file, *ref_locus)
    gb.set_annotated_sequence(gb_file, ref_annot_seq)
    temp = TemporaryFile("w+")
    gb_file.write(temp)
    
    temp.seek(0)
    gb_file = gb.GenBankFile.read(temp)
    temp.close()
    test_locus = gb.get_locus(gb_file)
    test_annot_seq = gb.get_annotated_sequence(gb_file, format=suffix)
    assert test_locus == ref_locus
    assert test_annot_seq.sequence       == ref_annot_seq.sequence
    assert test_annot_seq.annotation     == ref_annot_seq.annotation
    assert test_annot_seq.sequence_start == ref_annot_seq.sequence_start
Exemplo n.º 2
0
def test_genbank_utility_gp():
    """
    Check whether the high-level utility functions return the expected
    content of a known GenPept file. 
    """
    gp_file = gb.GenBankFile.read(join(data_dir("sequence"), "bt_lysozyme.gp"))
    #[print(e) for e in gp_file._field_pos]
    assert gb.get_locus(gp_file) \
        == ("AAC37312", 147, "", False, "MAM", "27-APR-1993")
    assert gb.get_definition(gp_file) == "lysozyme [Bos taurus]."
    assert gb.get_version(gp_file) == "AAC37312.1"
    assert gb.get_gi(gp_file) == 163334
    annotation = gb.get_annotation(gp_file)
    feature = seq.Feature(
        "Site",
        [seq.Location(start, stop) for start, stop in zip(
            [52,55,62,76,78,81,117,120,125],
            [53,55,62,76,78,81,117,120,126]
        )],
        {"note": "lysozyme catalytic cleft [active]", "site_type": "active"}
    )
    in_annotation = False
    for f in annotation:
        if f.key == feature.key and f.locs == feature.locs and \
           all([(key, val in f.qual.items())
                for key, val in feature.qual.items()]):
                    in_annotation = True
    assert in_annotation
    assert len(gb.get_sequence(gp_file, format="gp")) == 147
Exemplo n.º 3
0
def test_genbank_utility_gb():
    """
    Check whether the high-level utility functions return the expected
    content of a known GenBank file. 
    """
    gb_file = gb.GenBankFile.read(join(data_dir("sequence"), "ec_bl21.gb"))
    assert gb.get_locus(gb_file) \
        == ("CP001509", 4558953, "DNA", True, "BCT", "16-FEB-2017")
    assert gb.get_definition(gb_file) \
        == ("Escherichia coli BL21(DE3), complete genome.")
    assert gb.get_version(gb_file) == "CP001509.3"
    assert gb.get_gi(gb_file) == 296142109
    assert gb.get_db_link(gb_file) \
        == {"BioProject" : "PRJNA20713", "BioSample" : "SAMN02603478"}
    annotation = gb.get_annotation(gb_file, include_only=["CDS"])
    feature = seq.Feature(
        "CDS",
        [seq.Location(5681, 6457, seq.Location.Strand.REVERSE)],
        {"gene": "yaaA", "transl_table": "11"}
    )
    in_annotation = False
    for f in annotation:
        if f.key == feature.key and f.locs == feature.locs and \
           all([(key, val in f.qual.items())
                for key, val in feature.qual.items()]):
                    in_annotation = True
    assert in_annotation
    assert len(gb.get_sequence(gb_file, format="gb")) == 4558953
Exemplo n.º 4
0
                                     "protein",
                                     ret_type="gb")

# Array that will hold for each of the genes and each of the 4 domains
# the first and last position
# The array is initally filled with -1, as the value -1 will indicate
# that the domain does not exist in the sigma factor
domain_pos = np.full((len(genes), 4, 2), -1, dtype=int)
# Array that will hold the total sequence length of each sigma factor
seq_lengths = np.zeros(len(genes), dtype=int)
# Read the merged file containing multiple GenBank entries
multi_file = gb.MultiFile()
multi_file.read(file_name)
# Iterate over each GenBank entry
for i, gb_file in enumerate(multi_file):
    _, length, _, _, _, _ = gb.get_locus(gb_file)
    seq_lengths[i] = length
    annotation = gb.get_annotation(gb_file)
    # Find features, that represent a sigma factor domain
    for feature in annotation:
        if feature.key == "Region" and "note" in feature.qual \
           and "Sigma-70 factor domain" in feature.qual["note"]:
            # Extract the domain number
            # and decrement for 0-based indexing
            #
            # e.g. 'Sigma-70 factor domain-2.' => 1
            #                              ^
            domain_index = int(
                re.findall("(?<=Sigma-70 factor domain-)\d+",
                           feature.qual["note"])[0]) - 1
            # Expect a single contiguous location of the domain
Exemplo n.º 5
0
import biotite.sequence.graphics as graphics
import biotite.database.entrez as entrez


PLASMID_URL = "https://media.addgene.org/snapgene-media/" \
              "v1.6.2-0-g4b4ed87/sequences/67/17/246717/" \
              "addgene-plasmid-26094-sequence-246717.gbk"

response = requests.get(PLASMID_URL)
gb_file = gb.GenBankFile.read(io.StringIO(response.text))
annotation = gb.get_annotation(gb_file,
                               include_only=[
                                   "promoter", "terminator", "protein_bind",
                                   "RBS", "CDS", "rep_origin", "primer_bind"
                               ])
_, seq_length, _, _, _, _ = gb.get_locus(gb_file)
# AddGene stores the plasmid name in the 'KEYWORDS' field
# [0][0][0] ->
# The first (and only) 'KEYWORDS' field
# The first entry in the tuple
# The first (and only) line in the field
plasmid_name = gb_file.get_fields("KEYWORDS")[0][0][0]


def custom_feature_formatter(feature):
    # AddGene stores the feature label in the '\label' qualifier
    label = feature.qual.get("label")
    if feature.key == "promoter":
        return True, biotite.colors["dimgreen"], "black", label
    elif feature.key == "terminator":
        return True, "firebrick", "black", label