Пример #1
0
def test_genbank_consistency(path):
    """
    Test whether the same annotation (if reasonable) can be read from a
    GFF3 file and a GenBank file.
    """
    gb_file = gb.GenBankFile.read(join(data_dir("sequence"), path))
    ref_annot = gb.get_annotation(gb_file)

    gff_file = gff.GFFFile.read(join(data_dir("sequence"), path[:-3] + ".gff3"))
    test_annot = gff.get_annotation(gff_file)
    
    # Remove qualifiers, since they will be different
    # in GFF3 and GenBank
    ref_annot = seq.Annotation(
        [seq.Feature(feature.key, feature.locs) for feature in ref_annot]
    )
    test_annot = seq.Annotation(
        [seq.Feature(feature.key, feature.locs) for feature in test_annot]
    )
    for feature in test_annot:
        # Only CDS, gene, intron and exon should be equal
        # in GenBank and GFF3
        if feature.key in ["CDS", "gene", "intron", "exon"]:
            try:
                assert feature in test_annot
            except AssertionError:
                print(feature.key)
                for loc in feature.locs:
                    print(loc)
                raise
Пример #2
0
def test_genbank_utility_gp():
    """
    Check whether the high-level utility functions return the expected
    content of a known GenPept file. 
    """
    gp_file = gb.GenBankFile.read(join(data_dir("sequence"), "bt_lysozyme.gp"))
    #[print(e) for e in gp_file._field_pos]
    assert gb.get_locus(gp_file) \
        == ("AAC37312", 147, "", False, "MAM", "27-APR-1993")
    assert gb.get_definition(gp_file) == "lysozyme [Bos taurus]."
    assert gb.get_version(gp_file) == "AAC37312.1"
    assert gb.get_gi(gp_file) == 163334
    annotation = gb.get_annotation(gp_file)
    feature = seq.Feature(
        "Site",
        [seq.Location(start, stop) for start, stop in zip(
            [52,55,62,76,78,81,117,120,125],
            [53,55,62,76,78,81,117,120,126]
        )],
        {"note": "lysozyme catalytic cleft [active]", "site_type": "active"}
    )
    in_annotation = False
    for f in annotation:
        if f.key == feature.key and f.locs == feature.locs and \
           all([(key, val in f.qual.items())
                for key, val in feature.qual.items()]):
                    in_annotation = True
    assert in_annotation
    assert len(gb.get_sequence(gp_file, format="gp")) == 147
Пример #3
0
def test_genbank_utility_gb():
    """
    Check whether the high-level utility functions return the expected
    content of a known GenBank file. 
    """
    gb_file = gb.GenBankFile.read(join(data_dir("sequence"), "ec_bl21.gb"))
    assert gb.get_locus(gb_file) \
        == ("CP001509", 4558953, "DNA", True, "BCT", "16-FEB-2017")
    assert gb.get_definition(gb_file) \
        == ("Escherichia coli BL21(DE3), complete genome.")
    assert gb.get_version(gb_file) == "CP001509.3"
    assert gb.get_gi(gb_file) == 296142109
    assert gb.get_db_link(gb_file) \
        == {"BioProject" : "PRJNA20713", "BioSample" : "SAMN02603478"}
    annotation = gb.get_annotation(gb_file, include_only=["CDS"])
    feature = seq.Feature(
        "CDS",
        [seq.Location(5681, 6457, seq.Location.Strand.REVERSE)],
        {"gene": "yaaA", "transl_table": "11"}
    )
    in_annotation = False
    for f in annotation:
        if f.key == feature.key and f.locs == feature.locs and \
           all([(key, val in f.qual.items())
                for key, val in feature.qual.items()]):
                    in_annotation = True
    assert in_annotation
    assert len(gb.get_sequence(gb_file, format="gb")) == 4558953
Пример #4
0
 def _add_sec_str(annotation, first, last, str_type):
     if str_type == "a":
         str_type = "helix"
     elif str_type == "b":
         str_type = "sheet"
     else:
         # coil
         return
     feature = seq.Feature("SecStr", [seq.Location(first, last)],
                           {"sec_str_type": str_type})
     annotation.add_feature(feature)
Пример #5
0
def test_feature_without_id():
    """
    A feature without 'ID' should raise an error if it has multiple
    locations and consequently multiple entries in the GFF3 file.
    """
    annot = seq.Annotation(
        [seq.Feature(
            key  = "CDS",
            locs = [seq.Location(1,2), seq.Location(4,5)],
            qual = {"some" : "qualifiers"}
        )]
    )
    file = gff.GFFFile()
    with pytest.raises(ValueError):
        gff.set_annotation(file, annot)
Пример #6
0
                y,
                dx,
                dy,
                self._tail_width * bbox.height,
                self._head_width * bbox.height,
                # Create head with 90 degrees tip
                # -> head width/length ratio = 1/2
                head_ratio=0.5,
                draw_head=draw_head,
                color=biotite.colors["orange"],
                linewidth=0))


# Test our drawing functions with example annotation
annotation = seq.Annotation([
    seq.Feature("SecStr", [seq.Location(10, 40)], {"sec_str_type": "helix"}),
    seq.Feature("SecStr", [seq.Location(60, 90)], {"sec_str_type": "sheet"}),
])

fig = plt.figure(figsize=(8.0, 0.8))
ax = fig.add_subplot(111)
graphics.plot_feature_map(
    ax,
    annotation,
    multi_line=False,
    loc_range=(1, 100),
    # Register our drawing functions
    feature_plotters=[HelixPlotter(), SheetPlotter()])
fig.tight_layout()

########################################################################
Пример #7
0
# At this point the df ss_segments also contains 'L' linkers
# Create new df to store only those relevent for plotting

ss_segments_plot = ss_segments.query('sec_str_type != "L"')

#%%

annotation = seq.Annotation()
for _, start_aa, end_aa, ss_type in ss_segments_plot.itertuples():
    if ss_type == "H":
        ss_type = "helix"
    elif ss_type == "S":
        ss_type = "sheet"

    feature = seq.Feature("SecStr", [seq.Location(start_aa, end_aa)],
                          {"sec_str_type": ss_type})
    annotation.add_feature(feature)

#%%


class HelixPlotter(graphics.FeaturePlotter):
    def __init__(self):
        pass

    # Check whether this class is applicable for drawing a feature
    def matches(self, feature):
        if feature.key == "SecStr":
            if "sec_str_type" in feature.qual:
                if feature.qual["sec_str_type"] == "helix":
                    return True
This script shows how :class:`Feature` objects are displayed in a
plasmid map by using a custom 'toy' :class:`Annotation`. 
"""

# Code source: Patrick Kunzmann
# License: BSD 3 clause

import matplotlib.pyplot as plt
import numpy as np
import biotite.sequence as seq
import biotite.sequence.io.genbank as gb
import biotite.sequence.graphics as graphics
import biotite.database.entrez as entrez

annotation = seq.Annotation([
    seq.Feature("source", [seq.Location(0, 1500)],
                {"organism": "Escherichia coli"}),

    # Ori
    seq.Feature("rep_origin",
                [seq.Location(600, 700, seq.Location.Strand.REVERSE)], {
                    "regulatory_class": "promoter",
                    "note": "MyProm"
                }),

    # Promoter
    seq.Feature("regulatory", [seq.Location(1000, 1060)], {
        "regulatory_class": "promoter",
        "note": "MyProm"
    }),
    seq.Feature("protein_bind", [seq.Location(1025, 1045)], {"note": "repr"}),
Пример #9
0
def ss_csv_to_annotation(csv_path=str):

    # Codes retained for debugging
    # dataRootDir=r'W:\Data storage & Projects\PhD Project_Trevor Ho\3_Intein-assisted Bisection Mapping'
    # dataFolderDir='BM010\ECF20_structure_model'
    # exported_ss = pd.read_csv(os.path.join(dataRootDir,dataFolderDir,'ECF20_ExPASy_sec_struct.csv'))

    exported_ss = pd.read_csv(csv_path)
    ss_segments = pd.DataFrame()

    # Take info for the first ss segment without knowing when it ends

    start_aa, last_ss = exported_ss.iloc[0]
    previous_ss_seg = last_ss

    seq_end_aa, _ = exported_ss.iloc[
        -1]  # for recording the last segment of ss

    for _, aa, ss in exported_ss.itertuples():

        # Only when a new ss sgement is detected would an entry
        # for the previous ss segment be recorded

        if ss != last_ss:

            ss_unit_entry = pd.DataFrame({
                'start_aa': [start_aa],
                'end_aa': [aa - 1],
                'sec_str_type': [previous_ss_seg]
            })
            ss_segments = ss_segments.append(ss_unit_entry)

            previous_ss_seg = ss
            start_aa = aa

        if aa == seq_end_aa:
            ss_unit_entry = pd.DataFrame({
                'start_aa': [start_aa],
                'end_aa': [aa],
                'sec_str_type': [previous_ss_seg]
            })
            ss_segments = ss_segments.append(ss_unit_entry)

        last_ss = ss

    # At this point the df ss_segments also contains 'L' linkers
    # Create new df to store only those relevent for plotting

    ss_segments_plot = ss_segments.query('sec_str_type != "L"')

    annotation = seq.Annotation()
    for _, start_aa, end_aa, ss_type in ss_segments_plot.itertuples():
        if ss_type == "H":
            ss_type = "helix"
        elif ss_type == "S":
            ss_type = "sheet"

        feature = seq.Feature("SecStr", [seq.Location(start_aa, end_aa)],
                              {"sec_str_type": ss_type})
        annotation.add_feature(feature)

    return annotation