Пример #1
0
def test_genbank_consistency(path):
    """
    Test whether the same annotation (if reasonable) can be read from a
    GFF3 file and a GenBank file.
    """
    gb_file = gb.GenBankFile.read(join(data_dir("sequence"), path))
    ref_annot = gb.get_annotation(gb_file)

    gff_file = gff.GFFFile.read(join(data_dir("sequence"), path[:-3] + ".gff3"))
    test_annot = gff.get_annotation(gff_file)
    
    # Remove qualifiers, since they will be different
    # in GFF3 and GenBank
    ref_annot = seq.Annotation(
        [seq.Feature(feature.key, feature.locs) for feature in ref_annot]
    )
    test_annot = seq.Annotation(
        [seq.Feature(feature.key, feature.locs) for feature in test_annot]
    )
    for feature in test_annot:
        # Only CDS, gene, intron and exon should be equal
        # in GenBank and GFF3
        if feature.key in ["CDS", "gene", "intron", "exon"]:
            try:
                assert feature in test_annot
            except AssertionError:
                print(feature.key)
                for loc in feature.locs:
                    print(loc)
                raise
Пример #2
0
def visualize_secondary_structure(sse, first_id):

    dssp_to_abc = {
        "I": "c",
        "S": "c",
        "H": "a",
        "E": "b",
        "G": "c",
        "B": "b",
        "T": "c",
        "C": "c"
    }

    for element in range(0, len(sse)):
        sse[element] = dssp_to_abc[sse[element]]

    def _add_sec_str(annotation, first, last, str_type):
        if str_type == "a":
            str_type = "helix"
        elif str_type == "b":
            str_type = "sheet"
        else:
            # coil
            return
        feature = seq.Feature("SecStr", [seq.Location(first, last)],
                              {"sec_str_type": str_type})
        annotation.add_feature(feature)

    # Find the intervals for each secondary structure element
    # and add to annotation
    annotation = seq.Annotation()
    curr_sse = None
    curr_start = None
    for i in range(len(sse)):
        if curr_start is None:
            curr_start = i
            curr_sse = sse[i]
        else:
            if sse[i] != sse[i - 1]:
                _add_sec_str(annotation, curr_start + first_id,
                             i - 1 + first_id, curr_sse)
                curr_start = i
                curr_sse = sse[i]
    # Add last secondary structure element to annotation
    _add_sec_str(annotation, curr_start + first_id, i - 1 + first_id, curr_sse)

    fig = plt.figure(figsize=(8.0, 3.0))
    ax = fig.add_subplot(111)
    graphics.plot_feature_map(
        ax,
        annotation,
        symbols_per_line=150,
        loc_range=(first_id, first_id + len(sse)),
        show_numbers=True,
        show_line_position=True,
        feature_plotters=[HelixPlotter(), SheetPlotter()])
    fig.tight_layout()
    plt.show()
Пример #3
0
def test_feature_without_id():
    """
    A feature without 'ID' should raise an error if it has multiple
    locations and consequently multiple entries in the GFF3 file.
    """
    annot = seq.Annotation(
        [seq.Feature(
            key  = "CDS",
            locs = [seq.Location(1,2), seq.Location(4,5)],
            qual = {"some" : "qualifiers"}
        )]
    )
    file = gff.GFFFile()
    with pytest.raises(ValueError):
        gff.set_annotation(file, annot)
Пример #4
0
def make_feature_maps(gene):

    try:
        find_id = entrez.fetch(gene,
                               gettempdir(),
                               suffix="gb",
                               db_name="nuccore",
                               ret_type="gb")
        read_file = gb.GenBankFile.read(find_id)
        file_annotation = gb.get_annotation(read_file)
    except:
        flash('The entered gene could not found. Please try again.', 'error')
        return None

    key_list = []

    for feature in file_annotation:
        keys = feature.key
        key_list.append(keys)
        if feature.key == "source":
            # loc_range has exclusive stop
            loc = list(feature.locs)[0]
            loc_range = (loc.first, loc.last + 1)
            Unique_key = np.unique(key_list)

    pwd = os.getcwd()

    Unique_key = np.unique(key_list)
    for j in range(len(Unique_key)):
        i = Unique_key[j]

        fig, ax = plt.subplots(figsize=(8.0, 2.0))
        graphics.plot_feature_map(ax,
                                  seq.Annotation([
                                      feature for feature in file_annotation
                                      if feature.key == i
                                  ]),
                                  multi_line=False,
                                  loc_range=loc_range,
                                  show_line_position=True)

        plt.title('This plot is for {} features'.format(i))
        plt.savefig(pwd + '/app/static/images/{}.png'.format(i), dpi=300)
        session['valid_gene'] = True

    return None
Пример #5
0
def visualize_secondary_structure(sse, first_id, linesize=200):
    length = sse.shape[0]

    def _add_sec_str(annotation, first, last, str_type):
        if str_type == "a":
            str_type = "helix"
        elif str_type == "b":
            str_type = "sheet"
        else:
            # coil
            return
        feature = seq.Feature("SecStr", [seq.Location(first, last)],
                              {"sec_str_type": str_type})
        annotation.add_feature(feature)

    # Find the intervals for each secondary ssqa element
    # and add to annotation
    annotation = seq.Annotation()
    curr_sse = None
    curr_start = None
    for i in range(len(sse)):
        if curr_start is None:
            curr_start = i
            curr_sse = sse[i]
        else:
            if sse[i] != sse[i - 1]:
                _add_sec_str(annotation, curr_start + first_id,
                             i - 1 + first_id, curr_sse)
                curr_start = i
                curr_sse = sse[i]
    # Add last secondary ssqa element to annotation
    _add_sec_str(annotation, curr_start + first_id, i - 1 + first_id, curr_sse)

    fig = plt.figure(figsize=(8.0, 3.0))
    ax = fig.add_subplot(111)
    graphics.plot_feature_map(
        ax,
        annotation,
        symbols_per_line=linesize,
        loc_range=(1, length + 1),
        show_numbers=True,
        show_line_position=True,
        feature_plotters=[HelixPlotter(), SheetPlotter()])
    fig.tight_layout()
Пример #6
0
def visualize_secondary_structure(sse, first_id):
    def _add_sec_str(annotation, first, last, str_type):
        if str_type == "a":
            str_type = "helix"
        elif str_type == "b":
            str_type = "sheet"
        else:
            # coil
            return
        feature = seq.Feature("SecStr", [seq.Location(first, last)],
                              {"sec_str_type": str_type})
        annotation.add_feature(feature)

    # Find the intervals for each secondary structure element
    # and add to annotation
    annotation = seq.Annotation()
    curr_sse = None
    curr_start = None
    for i in range(len(sse)):
        if curr_start is None:
            curr_start = i
            curr_sse = sse[i]
        else:
            if sse[i] != sse[i - 1]:
                _add_sec_str(annotation, curr_start + first_id,
                             i - 1 + first_id, curr_sse)
                curr_start = i
                curr_sse = sse[i]
    # Add last secondary structure element to annotation
    _add_sec_str(annotation, curr_start + first_id, i - 1 + first_id, curr_sse)

    feature_map = graphics.FeatureMap(annotation,
                                      line_length=150,
                                      loc_range=(1, length + 1))
    feature_map.add_location_numbers(size=50)
    feature_map.drawfunc["SecStr"] = draw_secondary_strucure
    return feature_map.generate()
Пример #7
0
########################################################################
# Similarily to :class:`Alignment` objects, we can visualize an
# Annotation in a *feature map*.
# In order to avoid overlaping features, we draw only the *CDS* feature.

# Get the range of the entire annotation via the *source* feature
for feature in annotation:
    if feature.key == "source":
        # loc_range has exclusive stop
        loc = list(feature.locs)[0]
        loc_range = (loc.first, loc.last + 1)
fig, ax = plt.subplots(figsize=(8.0, 1.0))
graphics.plot_feature_map(
    ax,
    seq.Annotation([feature for feature in annotation
                    if feature.key == "CDS"]),
    multi_line=False,
    loc_range=loc_range,
    show_line_position=True)
fig.tight_layout()

########################################################################
# :class:`Annotation` objects can be indexed with slices, that represent
# the start and the stop base/residue of the annotation from which the
# subannotation is created.
# All features, that are not in this range, are not included in the
# subannotation.
# In order to demonstrate this indexing method, we create a
# subannotation that includes only features in range of the gene itself
# (without the regulatory stuff).
Пример #8
0
                y,
                dx,
                dy,
                self._tail_width * bbox.height,
                self._head_width * bbox.height,
                # Create head with 90 degrees tip
                # -> head width/length ratio = 1/2
                head_ratio=0.5,
                draw_head=draw_head,
                color=biotite.colors["orange"],
                linewidth=0))


# Test our drawing functions with example annotation
annotation = seq.Annotation([
    seq.Feature("SecStr", [seq.Location(10, 40)], {"sec_str_type": "helix"}),
    seq.Feature("SecStr", [seq.Location(60, 90)], {"sec_str_type": "sheet"}),
])

fig = plt.figure(figsize=(8.0, 0.8))
ax = fig.add_subplot(111)
graphics.plot_feature_map(
    ax,
    annotation,
    multi_line=False,
    loc_range=(1, 100),
    # Register our drawing functions
    feature_plotters=[HelixPlotter(), SheetPlotter()])
fig.tight_layout()

########################################################################
# Now let us do some serious application.
Пример #9
0
# Plot hydropathy
ax.plot(np.arange(1 + ma_radius,
                  len(hcn1) - ma_radius + 1),
        hydropathies,
        color=biotite.colors["dimorange"])
ax.axhline(0, color="gray", linewidth=0.5)
ax.set_xlim(1, len(hcn1) + 1)
ax.set_xlabel("HCN1 sequence position")
ax.set_ylabel("Hydropathy (15 residues moving average)")

# Draw boxes for annotated transmembrane helices for comparison
# with hydropathy plot
annotation = gb.get_annotation(gp_file, include_only=["Region"])
transmembrane_annotation = seq.Annotation([
    feature for feature in annotation
    if feature.qual["region_name"] == "Transmembrane region"
])
for feature in transmembrane_annotation:
    first, last = feature.get_location_range()
    ax.axvspan(first, last, color=(0.0, 0.0, 0.0, 0.2), linewidth=0)

# Plot similarity score as measure for conservation
ax2 = ax.twinx()
ax2.plot(np.arange(1 + ma_radius,
                   len(hcn1) - ma_radius + 1),
         scores,
         color=biotite.colors["brightorange"])
ax2.set_ylabel("Similarity score (15 residues moving average)")

ax.legend(handles=[
    Patch(color=biotite.colors["dimorange"], label="Hydropathy"),
Пример #10
0
            'start_aa': [start_aa],
            'end_aa': [aa],
            'sec_str_type': [previous_ss_seg]
        })
        ss_segments = ss_segments.append(ss_unit_entry)

    last_ss = ss

# At this point the df ss_segments also contains 'L' linkers
# Create new df to store only those relevent for plotting

ss_segments_plot = ss_segments.query('sec_str_type != "L"')

#%%

annotation = seq.Annotation()
for _, start_aa, end_aa, ss_type in ss_segments_plot.itertuples():
    if ss_type == "H":
        ss_type = "helix"
    elif ss_type == "S":
        ss_type = "sheet"

    feature = seq.Feature("SecStr", [seq.Location(start_aa, end_aa)],
                          {"sec_str_type": ss_type})
    annotation.add_feature(feature)

#%%


class HelixPlotter(graphics.FeaturePlotter):
    def __init__(self):
Пример #11
0
annotation = seq.Annotation([
    seq.Feature("source", [seq.Location(0, 1500)],
                {"organism": "Escherichia coli"}),

    # Ori
    seq.Feature("rep_origin",
                [seq.Location(600, 700, seq.Location.Strand.REVERSE)], {
                    "regulatory_class": "promoter",
                    "note": "MyProm"
                }),

    # Promoter
    seq.Feature("regulatory", [seq.Location(1000, 1060)], {
        "regulatory_class": "promoter",
        "note": "MyProm"
    }),
    seq.Feature("protein_bind", [seq.Location(1025, 1045)], {"note": "repr"}),

    # Gene A
    seq.Feature("regulatory", [seq.Location(1070, 1080)],
                {"regulatory_class": "ribosome_binding_site"}),
    seq.Feature("CDS", [seq.Location(1091, 1150)], {"product": "geneA"}),

    # Gene B
    seq.Feature("regulatory", [seq.Location(1180, 1190)],
                {"regulatory_class": "ribosome_binding_site"}),
    seq.Feature("CDS", [seq.Location(1201, 1350)], {"product": "geneB"}),
    seq.Feature("regulatory", [seq.Location(1220, 1230)],
                {"regulatory_class": "ribosome_binding_site"}),
    seq.Feature("CDS", [seq.Location(1240, 1350)], {"product": "geneB2"}),

    # Gene C
    seq.Feature("regulatory", [seq.Location(1380, 1390)],
                {"regulatory_class": "ribosome_binding_site"}),
    seq.Feature(
        "CDS",
        # CDS extends over periodic boundary -> two locations
        [seq.Location(1, 300), seq.Location(1402, 1500)],
        {"product": "geneC"}),

    # Terminator
    seq.Feature("regulatory", [seq.Location(310, 350)], {
        "regulatory_class": "terminator",
        "note": "MyTerm"
    }),

    # Primers
    # The labels will be too long to be displayed on the map
    # If you want to display them nevertheless, set the
    # 'omit_oversized_labels' to False
    seq.Feature("primer_bind", [seq.Location(1385, 1405)], {"note": "geneC"}),
    seq.Feature("primer_bind",
                [seq.Location(345, 365, seq.Location.Strand.REVERSE)],
                {"note": "geneC_R"}),

    # Terminator
    seq.Feature("regulatory", [seq.Location(310, 350)], {
        "regulatory_class": "terminator",
        "note": "MyTerm"
    }),
])
Пример #12
0
def ss_csv_to_annotation(csv_path=str):

    # Codes retained for debugging
    # dataRootDir=r'W:\Data storage & Projects\PhD Project_Trevor Ho\3_Intein-assisted Bisection Mapping'
    # dataFolderDir='BM010\ECF20_structure_model'
    # exported_ss = pd.read_csv(os.path.join(dataRootDir,dataFolderDir,'ECF20_ExPASy_sec_struct.csv'))

    exported_ss = pd.read_csv(csv_path)
    ss_segments = pd.DataFrame()

    # Take info for the first ss segment without knowing when it ends

    start_aa, last_ss = exported_ss.iloc[0]
    previous_ss_seg = last_ss

    seq_end_aa, _ = exported_ss.iloc[
        -1]  # for recording the last segment of ss

    for _, aa, ss in exported_ss.itertuples():

        # Only when a new ss sgement is detected would an entry
        # for the previous ss segment be recorded

        if ss != last_ss:

            ss_unit_entry = pd.DataFrame({
                'start_aa': [start_aa],
                'end_aa': [aa - 1],
                'sec_str_type': [previous_ss_seg]
            })
            ss_segments = ss_segments.append(ss_unit_entry)

            previous_ss_seg = ss
            start_aa = aa

        if aa == seq_end_aa:
            ss_unit_entry = pd.DataFrame({
                'start_aa': [start_aa],
                'end_aa': [aa],
                'sec_str_type': [previous_ss_seg]
            })
            ss_segments = ss_segments.append(ss_unit_entry)

        last_ss = ss

    # At this point the df ss_segments also contains 'L' linkers
    # Create new df to store only those relevent for plotting

    ss_segments_plot = ss_segments.query('sec_str_type != "L"')

    annotation = seq.Annotation()
    for _, start_aa, end_aa, ss_type in ss_segments_plot.itertuples():
        if ss_type == "H":
            ss_type = "helix"
        elif ss_type == "S":
            ss_type = "sheet"

        feature = seq.Feature("SecStr", [seq.Location(start_aa, end_aa)],
                              {"sec_str_type": ss_type})
        annotation.add_feature(feature)

    return annotation