Пример #1
0
# Now a function must be defined, that converts the similarity depicted
# by a substitution matrix into a distance required by the UPGMA method.
# In this case, the distance is defined as the difference between the
# similarity of the two symbols and the average maximum similarity of
# the symbols to themselves.
#
# Finally the obtained (phylogenetic) tree is plotted as dendrogram.
def get_distance(similarities, i, j):
    s_max = (similarities[i, i] + similarities[j, j]) / 2
    return s_max - similarities[i, j]


distances = np.zeros(similarities.shape)
for i in range(distances.shape[0]):
    for j in range(distances.shape[1]):
        distances[i, j] = get_distance(similarities, i, j)

tree = phylo.upgma(distances)

fig = plt.figure(figsize=(8.0, 5.0))
ax = fig.add_subplot(111)
# Use the 3-letter amino acid code aa label
labels = [
    seq.ProteinSequence.convert_letter_1to3(letter).capitalize()
    for letter in matrix.get_alphabet1()
]
graphics.plot_dendrogram(ax, tree, orientation="top", labels=labels)
ax.set_ylabel("Distance")
# Add grid for clearer distance perception
ax.yaxis.grid(color="lightgray")
plt.show()
Пример #2
0
# Create tree from root node
tree = phylo.Tree(root=root)
# Trees can be converted into Newick notation
print("Tree:", tree.to_newick(labels=fruits))
# Distances can be omitted
print("Tree w/o distances:",
      tree.to_newick(labels=fruits, include_distance=False))
# Distances can be measured
distance = tree.get_distance(fruits.index("Apple"), fruits.index("Banana"))
print("Distance Apple-Banana:", distance)

########################################################################
# You can also plot a tree as dendrogram.

fig, ax = plt.subplots(figsize=(6.0, 6.0))
graphics.plot_dendrogram(ax, tree, labels=fruits)
fig.tight_layout()

########################################################################
# From distances to trees
# ^^^^^^^^^^^^^^^^^^^^^^^
#
# When you want to create a :class:`Tree` from distances obtained for
# example from sequence alignments, you can use the UPGMA algorithm
# implemented in the function of the same name :func:`upgma()`.

distances = np.array([[0, 17, 21, 31, 23], [17, 0, 30, 34, 21],
                      [21, 30, 0, 28, 39], [31, 34, 28, 0, 43],
                      [23, 21, 39, 43, 0]])
tree = phylo.upgma(distances)
fig, ax = plt.subplots(figsize=(6.0, 3.0))
Пример #3
0
def _show_tree(tree):
    import biotite.sequence.graphics as graphics
    import matplotlib.pyplot as plt
    fig, ax = plt.subplots()
    graphics.plot_dendrogram(ax, tree)
    plt.show()
Пример #4
0
app.join()
alignment = app.get_alignment()
print(alignment)

########################################################################
# In most MSA software even more information than the mere alignment can
# be extracted.
# For instance the guide tree that was used for the alignment can be
# obtained from the MUSCLE output.

import matplotlib.pyplot as plt
import biotite.sequence.graphics as graphics

tree = app.get_guide_tree()
fig, ax = plt.subplots()
graphics.plot_dendrogram(
    ax, tree, labels=[str(sequence) for sequence in [seq1, seq2, seq3, seq4]])
ax.set_xlabel("Distance")
fig.tight_layout()

########################################################################
# For the lazy people there is also a convenience method,
# that handles the :class:`Application` execution internally.
# However, this shortcut returns only the :class:`Alignment`.

alignment = muscle.MuscleApp.align([seq1, seq2, seq3, seq4])

########################################################################
# The alternatives to MUSCLE are Clustal-Omega and MAFFT.
# To use them, simply replace :class:`MuscleApp` with
# :class:`ClustalOmegaApp` or :class:`MafftApp` and you are done.
Пример #5
0
    for name, seq_str in zip(UNIPROT_IDS.keys(), fasta_file.values())
}

### create a simple phylogenetic tree
# create MSA
alignment = clustalo.ClustalOmegaApp.align(list(sequences.values()))
# build simple tree based on deviation from sequence identity
distances = 1 - align.get_pairwise_sequence_identity(alignment,
                                                     mode="shortest")
tree = phylo.upgma(distances)

### plot the tree
fig, ax = plt.subplots(1, 1, figsize=(8, 5))
graphics.plot_dendrogram(ax,
                         tree,
                         orientation="left",
                         labels=list(UNIPROT_IDS.keys()),
                         show_distance=False,
                         linewidth=2)
ax.grid(False)
ax.set_xticks([])

# distance indicator
indicator_len = 0.1
indicator_start = (ax.get_xlim()[0] + ax.get_xlim()[1] * 0.02,
                   ax.get_ylim()[1] - ax.get_ylim()[1] * 0.15)
indicator_stop = (indicator_start[0] + indicator_len, indicator_start[1])
indicator_center = ((indicator_start[0] + indicator_stop[0]) / 2,
                    (indicator_start[1] + 0.25))
ax.annotate("",
            xy=indicator_start,
            xytext=indicator_stop,