forked from argriffing/xgcode
/
20091004a.py
133 lines (119 loc) · 3.92 KB
/
20091004a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""Look at the pseudoinverse of a node-weighted Laplacian-like matrix.
Look at the pseudoinverse of a Laplacian-like matrix
for non-uniform node weights.
This Laplacian-like matrix is the cross-product matrix S from the Abdi paper.
"""
from StringIO import StringIO
import random
import time
import argparse
import numpy as np
from SnippetUtil import HandlingError
import SnippetUtil
import Form
import FormOut
import NewickIO
import FelTree
import Euclid
import TreeSampler
def get_form():
"""
@return: a list of form objects
"""
# define the list of form objects
form_objects = [
Form.Integer('ntaxa', 'number of taxa',
5, low=3, high=20)]
return form_objects
def get_form_out():
return FormOut.Report()
def sample_branch_lengths(tree):
"""
Modify the tree by setting branch lengths.
@param tree: a tree
"""
for node in tree.preorder():
if not node.is_root():
branch_length = float(random.randrange(1, 1000))
node.set_branch_length(branch_length)
def edm_to_S(D, m):
"""
@param D: a matrix of squared euclidean distances
@param m: a vector of masses
@return: an S matrix that is like the pseudoinverse of Laplacian
"""
n = len(m)
if D.shape != (n, n):
raise ValueError('D should be a square matrix conformant to m')
if any(x < 0 for x in m):
raise ValueError('each element in m should be nonnegative')
if not np.allclose(sum(m), 1):
raise ValueError('the masses should sum to one')
I = np.eye(n)
E = I - np.outer(np.ones(n), m)
S = (-0.5)*np.dot(E, np.dot(D, E))
return S
def process(ntaxa):
"""
@param ntaxa: use this many taxa per tree
@return: a multi-line string that summarizes the results
"""
np.set_printoptions(linewidth=200)
# sample an xtree topology
xtree = TreeSampler.sample_agglomerated_tree(ntaxa)
# convert the xtree to a FelTree, although I guess this might not be necessary
tree_string = xtree.get_newick_string()
tree = NewickIO.parse(tree_string, FelTree.NewickTree)
# get ordered ids and the number of leaves and some auxiliary variables
ordered_ids = get_ordered_ids(tree)
nleaves = len(list(tree.gen_tips()))
id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
# sample random branch lengths
sample_branch_lengths(tree)
# get the weighted tree string
weighted_tree_string = NewickIO.get_newick_string(tree)
# get the distance matrix relating all vertices
D = np.array(tree.get_partial_distance_matrix(ordered_ids))
# create a mass vector that sums to one
m = np.array([random.randrange(1, 10) for i in range(len(D))], dtype=float)
m /= sum(m)
# get the S matrix
S = edm_to_S(D, m)
# get the pseudoinverse of S
S_pinv = np.linalg.pinv(S)
# make the response
out = StringIO()
print >> out, 'newick tree:', weighted_tree_string
print >> out
print >> out, 'm:'
print >> out, m
print >> out
print >> out, 'D:'
print >> out, D
print >> out
print >> out, 'S:'
print >> out, S
print >> out
print >> out, 'pseudoinverse of S:'
print >> out, S_pinv
print >> out
return out.getvalue().strip()
def get_response_content(fs):
return process(fs.ntaxa) + '\n'
def get_ordered_ids(tree):
"""
Maybe I could use postorder here instead.
@param tree: a tree
@return: a list of ids beginning with the leaves
"""
ordered_ids = []
ordered_ids.extend(id(node) for node in tree.gen_tips())
ordered_ids.extend(id(node) for node in tree.gen_internal_nodes())
return ordered_ids
def main(args):
print process(args.ntaxa)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=SnippetUtil.docstring_to_title(__doc__))
parser.add_argument('--ntaxa', type=int, default=5, help='number of taxa in the tree')
args = parser.parse_args()
main(args)