def test_make_distance_based_exclusion_fn(self): """make_distance_based_exclusion_fn should return a working function""" exclude_similar_strains = make_distance_based_exclusion_fn(0.03) # Test that new function is documented exp_doc = "Exclude neighbors of tip within 0.030000 branch length units" self.assertEqual(exp_doc, exclude_similar_strains.__doc__) # Test that the function works test_tree = self.SimpleTree.deepcopy() # print test_tree.getNewick(with_distances=True) tip = test_tree.getNodeMatchingName("C") obs = exclude_similar_strains(tip, test_tree).getNewick(with_distances=True) exp = "(A:0.02,B:0.01)root;" self.assertEqual(obs, exp) # Test on a tree where a single node will remain test_tree = DndParser("((A:0.02,B:0.01)E:0.05,(C:0.06,D:0.01)F:0.05)root;") # print test_tree.getNewick(with_distances=True) tip = test_tree.getNodeMatchingName("D") obs = exclude_similar_strains(tip, test_tree).getNewick(with_distances=True) exp = "((A:0.02,B:0.01)E:0.05,C:0.11)root;" self.assertEqual(obs, exp) # Test that we raise if distance is too large test_tree = self.SimpleTree.deepcopy() test_fn = make_distance_based_exclusion_fn(300.0) tip = test_tree.getNodeMatchingName("C") self.assertRaises(ValueError, test_fn, tip, test_tree)
def test_get_nearest_named_ancestor(self): """correctly get the nearest named ancestor""" t = DndParser("(((s1,s2)g1,s3))root;") t2 = DndParser("(((s1,s2)g1,s3));") exp_t = t exp_t2 = None obs_t = get_nearest_named_ancestor(t.getNodeMatchingName('s3')) obs_t2 = get_nearest_named_ancestor(t2.getNodeMatchingName('s3')) self.assertEqual(obs_t, exp_t) self.assertEqual(obs_t2, exp_t2)
def test_make_distance_based_exclusion_fn(self): """make_distance_based_exclusion_fn should return a working function""" exclude_similar_strains =\ make_distance_based_exclusion_fn(0.03) #Test that new function is documented exp_doc = 'Exclude neighbors of tip within 0.030000 branch length units' self.assertEqual(exp_doc, exclude_similar_strains.__doc__) #Test that the function works test_tree = self.SimpleTree.deepcopy() #print test_tree.getNewick(with_distances=True) tip = test_tree.getNodeMatchingName('C') obs = exclude_similar_strains(tip, test_tree).getNewick(with_distances=True) exp = "(A:0.02,B:0.01)root;" self.assertEqual(obs, exp) #Test on a tree where a single node will remain test_tree = \ DndParser("((A:0.02,B:0.01)E:0.05,(C:0.06,D:0.01)F:0.05)root;") #print test_tree.getNewick(with_distances=True) tip = test_tree.getNodeMatchingName('D') obs = exclude_similar_strains(tip, test_tree).getNewick(with_distances=True) exp = "((A:0.02,B:0.01)E:0.05,C:0.11)root;" self.assertEqual(obs, exp) #Test that we raise if distance is too large test_tree = self.SimpleTree.deepcopy() test_fn = make_distance_based_exclusion_fn(300.0) tip = test_tree.getNodeMatchingName('C') self.assertRaises(ValueError, test_fn, tip, test_tree)
def test_unifrac_make_subtree(self): """unifrac result should not depend on make_subtree environment M contains only tips not in tree, tip j, k is in no envs one clade is missing entirely values were calculated by hand we also test that we still have a valid tree at the end """ t1 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\ UniFracTreeNode) # note c,j is len 0 node # /-------- /-a # ---------| \-b # | /-------- /-c # \--------| \mt------ /-j # | \-k # \-------- /-d # \-e # env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1 m M 88""" env_counts = count_envs(env_str.splitlines()) self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=False)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=True)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # changing tree topology relative to c,j tips shouldn't change anything t2 = DndParser('((a:1,b:2):4,((c:2, (j:1,k:2)mt:17):1,(d:1,e:1):2):3)', \ UniFracTreeNode) self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=False)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=True)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # ensure we haven't meaningfully changed the tree # by passing it to unifrac t3 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\ UniFracTreeNode) # note c,j is len 0 node t1_tips = [tip.Name for tip in t1.tips()] t1_tips.sort() t3_tips = [tip.Name for tip in t3.tips()] t3_tips.sort() self.assertEqual(t1_tips, t3_tips) tipj3 = t3.getNodeMatchingName('j') tipb3 = t3.getNodeMatchingName('b') tipj1 = t1.getNodeMatchingName('j') tipb1 = t1.getNodeMatchingName('b') self.assertFloatEqual(tipj1.distance(tipb1), tipj3.distance(tipb3))
def test_unifrac_make_subtree(self): """unifrac result should not depend on make_subtree environment M contains only tips not in tree, tip j, k is in no envs one clade is missing entirely values were calculated by hand we also test that we still have a valid tree at the end """ t1 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\ UniFracTreeNode) # note c,j is len 0 node # /-------- /-a # ---------| \-b # | /-------- /-c # \--------| \mt------ /-j # | \-k # \-------- /-d # \-e # env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1 m M 88""" env_counts = count_envs(env_str.splitlines()) self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=False)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=True)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # changing tree topology relative to c,j tips shouldn't change anything t2 = DndParser('((a:1,b:2):4,((c:2, (j:1,k:2)mt:17):1,(d:1,e:1):2):3)', \ UniFracTreeNode) self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=False)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=True)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # ensure we haven't meaningfully changed the tree # by passing it to unifrac t3 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\ UniFracTreeNode) # note c,j is len 0 node t1_tips = [tip.Name for tip in t1.tips()] t1_tips.sort() t3_tips = [tip.Name for tip in t3.tips()] t3_tips.sort() self.assertEqual(t1_tips, t3_tips) tipj3 = t3.getNodeMatchingName('j') tipb3 = t3.getNodeMatchingName('b') tipj1 = t1.getNodeMatchingName('j') tipb1 = t1.getNodeMatchingName('b') self.assertFloatEqual(tipj1.distance(tipb1), tipj3.distance(tipb3))
class TestPredictTraits(TestCase): """Tests of predict_traits.py""" def setUp(self): self.SimpleTree = \ DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;") #Set up a tree with obvious differences in the rate of gene content #evolution to test confidence interval estimation #Features: # --trait 1 is has ~ 10 fold higher confidence intervals than trait 0. # Trait 2 is 10 fold higher than trait 1 # -- of predicted nodes B and D, D has a ~10 fold longer branch self.SimpleUnequalVarianceTree =\ DndParser("((A:0.01,B:0.01)E:0.05,(C:0.01,D:0.10)F:0.05)root;") traits = {"A":[1.0,1.0,1.0],"C":[1.0,1.0,1.0],"E":[1.0,1.0,1.0],"F":[1.0,1.0,1.0]} self.SimpleUnequalVarianceTree = assign_traits_to_tree(traits,\ self.SimpleUnequalVarianceTree,trait_label="Reconstruction") self.SimpleUnequalVarianceTree.getNodeMatchingName('E').upper_bound = [2.0,20.0,200.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('E').lower_bound = [-1.0,-19.0,-199.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('F').upper_bound = [2.0,20.0,200.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('F').lower_bound = [-1.0,-19.0,-199.0] #Set up a tree with a three-way polytomy self.SimplePolytomyTree = \ DndParser("((A:0.02,B:0.01,B_prime:0.03)E:0.05,(C:0.01,D:0.01)F:0.05)root;") self.SimpleTreeTraits =\ {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} self.PartialReconstructionTree =\ DndParser("((((B:0.01,C:0.01)I3:0.01,A:0.01)I2:0.01,D:0.01)I1:0.01)root;") self.CloseToI3Tree =\ DndParser("((((B:0.01,C:0.95)I3:0.01,A:0.01)I2:0.95,D:0.05)I1:0.95)root;") self.CloseToI1Tree =\ DndParser("((((B:0.95,C:0.95)I3:0.95,A:0.01)I2:0.02,D:0.05)I1:0.05)root;") self.BetweenI3AndI1Tree=\ DndParser("((((B:0.01,C:0.1)I3:0.02,A:0.01)I2:0.02,D:0.05)I1:0.02)root;") self.PartialReconstructionTraits =\ {"B":[1.0,1.0],"C":[1.0,1.0],"I3":[1.0,1.0],"I1":[0.0,1.0],"D":[0.0,1.0]} self.GeneCountTraits =\ {"B":[1.0,1.0],"C":[1.0,2.0],"I3":[1.0,1.0],"I1":[0.0,3.0],"D":[0.0,5.0]} #create a tmp trait file self.in_trait1_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_trait1_file=open(self.in_trait1_fp,'w') self.in_trait1_file.write(in_trait1) self.in_trait1_file.close() #create another tmp trait file (with columns in different order) self.in_trait2_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_trait2_file=open(self.in_trait2_fp,'w') self.in_trait2_file.write(in_trait2) self.in_trait2_file.close() #create a tmp trait file with a incorrect trait name self.in_bad_trait_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_bad_trait_file=open(self.in_bad_trait_fp,'w') self.in_bad_trait_file.write(in_bad_trait) self.in_bad_trait_file.close() self.files_to_remove = [self.in_trait1_fp,self.in_trait2_fp,self.in_bad_trait_fp] def tearDown(self): remove_files(self.files_to_remove) def test_nearest_neighbor_prediction(self): """nearest_neighbor_prediction predicts nearest neighbor's traits""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Test with default options results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"]) self.assertEqual(results["B"],array([1.0,1.0])) self.assertEqual(results["C"],array([0.0,0.0])) #Test allowing ancestral NNs results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"],\ tips_only = False) self.assertEqual(results["C"],array([0.0,1.0])) #Test allowing self to be NN AND Ancestral NNs results = predict_nearest_neighbor(tree, nodes_to_predict =["A","B","C","D"],\ tips_only = False,use_self_in_prediction=True) self.assertEqual(results["A"],array([1.0,1.0])) self.assertEqual(results["B"],array([1.0,1.0])) self.assertEqual(results["C"],array([0.0,1.0])) self.assertEqual(results["D"],array([0.0,0.0])) def test_calc_nearest_sequenced_taxon_index(self): """calc_nearest_sequenced_taxon_index calculates the NSTI measure""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Expected distances: # A --> A 0.0 # B --> A 0.03 # C --> D 0.02 # D --> D 0.0 # = 0.05/4.0 = 0.0125 exp = 0.0125 verbose = False #Test with default options obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,verbose=verbose) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["A"],0.0) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02) self.assertFloatEqual(obs_distances["D"],0.00) #Test calcing the index while #limiting prediction to B and C # B --> A 0.03 # C --> D 0.02 exp = 0.025 obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = ["B","C"],verbose=False) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02) def test_get_nn_by_tree_descent(self): """calc_nearest_sequenced_taxon_index calculates the NSTI measure""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Expected distances: # A --> A 0.0 # B --> A 0.03 # C --> D 0.02 # D --> D 0.0 # = 0.05/4.0 = 0.0125 exp = 0.0125 #Test with default options nn,distance = get_nn_by_tree_descent(tree,"B",verbose=True) self.assertEqual(nn.Name,"A") self.assertFloatEqual(distance,0.03) nn,distance = get_nn_by_tree_descent(tree,"A",verbose=True) self.assertEqual(nn.Name,"A") self.assertFloatEqual(distance,0.00) nn,distance = get_nn_by_tree_descent(tree,"A",filter_by_property=False,verbose=True) self.assertEqual(nn.Name,"B") self.assertFloatEqual(distance,0.03) nn,distance = get_nn_by_tree_descent(tree,"C",verbose=True) self.assertEqual(nn.Name,"D") self.assertFloatEqual(distance,0.02) #self.assertFloatEqual(obs_distances["A"],0.0) #self.assertFloatEqual(obs_distances["B"],0.03) #self.assertFloatEqual(obs_distances["C"],0.02) #self.assertFloatEqual(obs_distances["D"],0.00) #Test calcing the index while #limiting prediction to B and C # B --> A 0.03 # C --> D 0.02 exp = 0.025 obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = ["B","C"],verbose=False) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02) def test_predict_random_neighbor(self): """predict_random_neighbor predicts randomly""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree) #If there is only one other valid result, this #should always be predicted #self.SimpleTreeTraits =\ # {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} #If self predictions are disallowed, then the prediction for A should #always come from node D, and be 0,0. results = predict_random_neighbor(tree,['A'],\ trait_label = "Reconstruction",\ use_self_in_prediction=False) self.assertEqual(results['A'],[0.0,0.0]) #If use_self is True, ~50% of predictions should be [1.0,1.0] and # half should be [0.0,0.0] #Pick repeatedly and make sure frequencies are #reasonable. The technique is fast, so #many iterations are reasonable. iterations = 100000 a_predictions = 0 d_predictions = 0 for i in range(iterations): results = predict_random_neighbor(tree,['A'],\ trait_label = "Reconstruction",\ use_self_in_prediction=True) #print results if results['A'] == [1.0,1.0]: #print "A pred" a_predictions += 1 elif results['A'] == [0.0,0.0]: #print "D pred" d_predictions +=1 else: raise RuntimeError(\ "Bad prediction result: Neither node A nor node D traits used in prediction") #print "All a predictions:",a_predictions #print "All d predictions:",d_predictions ratio = float(a_predictions)/float(iterations) #print "Ratio:", ratio self.assertFloatEqual(ratio,0.5,eps=1e-2) def test_get_nearest_annotated_neightbor(self): """get_nearest_annotated_neighbor finds nearest relative with traits""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree) #Test ancestral NN matching nn = get_nearest_annotated_neighbor(tree,'A',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'E') nn = get_nearest_annotated_neighbor(tree,'B',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'E') nn = get_nearest_annotated_neighbor(tree,'C',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'F') nn = get_nearest_annotated_neighbor(tree,'D',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'F') #Test tip only, non-self matching nn = get_nearest_annotated_neighbor(tree,'A',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'D') nn = get_nearest_annotated_neighbor(tree,'B',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'A') nn = get_nearest_annotated_neighbor(tree,'C',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'D') nn = get_nearest_annotated_neighbor(tree,'D',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'A') def test_biom_table_from_predictions(self): """format predictions into biom format""" traits = self.SimpleTreeTraits tree = self.SimpleTree #print "Starting tree:",tree.asciiArt() # Test on simple tree result_tree = assign_traits_to_tree(traits,tree) nodes_to_predict = [n.Name for n in result_tree.tips()] #print "Predicting nodes:", nodes_to_predict predictions = predict_traits_from_ancestors(result_tree,\ nodes_to_predict) biom_table=biom_table_from_predictions(predictions,["trait1","trait2"]) def test_equal_weight(self): """constant_weight weights by a constant""" w = 1.0 d = 0.1 for i in range(100): obs = equal_weight(i) exp = w self.assertFloatEqual(obs,exp) def test_make_neg_exponential_weight_fn(self): """make_neg_exponential_weight_fn returns the specified fn""" exp_base = 10 weight_fn = make_neg_exponential_weight_fn(exp_base) d = 10.0 obs = weight_fn(d) exp = 10.0**-10.0 self.assertFloatEqual(obs,exp) #Test for base two exp_base = 2 weight_fn = make_neg_exponential_weight_fn(exp_base) d = 16.0 obs = weight_fn(d) exp = 2.0**-16.0 self.assertFloatEqual(obs,exp) def test_linear_weight(self): """linear_weight weights linearly""" max_d = 1.0 d = 0.90 obs = linear_weight(d,max_d) exp = 0.10 self.assertFloatEqual(obs, exp) d = 0.0 obs = linear_weight(d,max_d) exp = 1.0 self.assertFloatEqual(obs, exp) max_d = 3.0 d = 1.5 obs = linear_weight(d,max_d) exp = 0.50 self.assertFloatEqual(obs, exp) def test_inverse_variance_weight(self): """inverse_variance_weight""" #TODO: test this works with arrays of variances var = 1000.0 for d in range(1,10): d = float(d) obs = inverse_variance_weight(d,var) exp = 1.0/1000.0 self.assertFloatEqual(obs,exp) #Now test the special case of zero variance var = 0.0 for d in range(1,10): d = float(d) obs = inverse_variance_weight(d,var) exp = 1.0/1e-10 self.assertFloatEqual(obs,exp) def test_assign_traits_to_tree(self): """assign_traits_to_tree should map reconstructed traits to tree nodes""" # Test that the function assigns traits from a dict to a tree node traits = self.SimpleTreeTraits tree = self.SimpleTree # Test on simple tree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp) # Test on polytomy tree tree = self.SimplePolytomyTree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp) def test_assign_traits_to_tree_quoted_node_name(self): """Assign_traits_to_tree should remove quotes from node names""" # Test that the function assigns traits from a dict to a tree node traits = self.SimpleTreeTraits tree = self.SimpleTree #Make one node quoted tree.getNodeMatchingName('A').Name="'A'" tree.getNodeMatchingName('B').Name='"B"' # Test on simple tree result_tree = assign_traits_to_tree(traits,tree,fix_bad_labels=True) #Setting fix_bad_labels to false produces NoneType predictions when #labels are quoted # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name.strip("'").strip('"'), None) self.assertEqual(obs,exp) # Test on polytomy tree tree = self.SimplePolytomyTree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp) def test_update_trait_dict_from_file(self): """update_trait_dict_from_file should parse input trait tables (asr and genome) and match traits between them""" header,traits=update_trait_dict_from_file(self.in_trait1_fp) self.assertEqual(header,["trait2","trait1"]) self.assertEqual(traits,{3:[3,1],'A':[5,2.5],'D':[5,2]}) #test that we get a warning when header from other trait table doesn't match perfectly. with catch_warnings(record=True) as w: header2,traits2=update_trait_dict_from_file(self.in_trait2_fp,header) self.assertEqual(header2,["trait2","trait1"]) self.assertEqual(traits2,{1:[3,1], 2:[3,0], 3:[3,2]}) assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Missing" in str(w[-1].message) #try giving a trait table with a trait that doesn't match our header self.assertRaises(RuntimeError,update_trait_dict_from_file,self.in_bad_trait_fp,header) def test_predict_traits_from_ancestors(self): """predict_traits_from_ancestors should propagate ancestral states""" # Testing the point predictions first (since these are easiest) # When the node is very close to I3, prediction should be approx. I3 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) nodes_to_predict = ['A'] prediction = predict_traits_from_ancestors(tree=tree,\ nodes_to_predict=nodes_to_predict) exp = traits["I3"] #print "PREDICTION:",prediction for node in nodes_to_predict: self.assertFloatEqual(around(prediction[node]),exp) #TODO: need to add test case where a very hard to predict # single value is present in a sequenced genome. Then # test that use_self_in_prediction controls whether this is used def test_predict_traits_from_ancestors_correctly_predicts_variance(self): """predict_traits_from_ancestors should correctly report variance due to branch lengths and rates of gene copy number evolution """ tree = self.SimpleUnequalVarianceTree #All values are 1, but variance in the prediction should vary #due to vary unequal branch lengths (between taxa) and brownian #motion parameters (between traits) nodes_to_predict = ['B','D'] bm_fixed_10_fold = [1.0,10.0,100.0] prediction,variances,confidence_intervals = predict_traits_from_ancestors(tree=tree,\ nodes_to_predict=nodes_to_predict,calc_confidence_intervals=True,\ lower_bound_trait_label='lower_bound',upper_bound_trait_label='upper_bound', brownian_motion_parameter = bm_fixed_10_fold,trait_label="Reconstruction") #All traits are 1, so all predictions should be 1 exp_predictions = {'B':[1.0,1.0,1.0],'D':[1.0,1.0,1.0]} self.assertEqualItems(prediction,exp_predictions) #We don't expect variances to be exactly 10 fold increasing #but do expect they should be in rank order for tip in ['B','D']: tip_vars = variances[tip]['variance'] self.assertTrue(tip_vars[0]<tip_vars[1]) self.assertTrue(tip_vars[1]<tip_vars[2]) #Also note that trait D is on a much longer branch, so we expect #it to have higher variance self.assertTrue((array(variances['B']['variance'])<array(variances['D']['variance'])).all()) def test_fill_unknown_traits(self): """fill_unknown_traits should propagate only known characters""" # Only the missing values in to_update should be # filled in with appropriate values from new to_update = array([1,0,1,None,1,0]) new = array([None,None,1,1,1,1]) obs = fill_unknown_traits(to_update,new) exp = array([1,0,1,1,1,0]) self.assertTrue(array_equal(obs,exp)) #Try the reverse update obs = fill_unknown_traits(new,to_update) exp = array([1,0,1,1,1,1]) self.assertTrue(array_equal(obs,exp)) # Ensure that if to_update is None, the value of new is returned obs = fill_unknown_traits(None, new) #print "Obs:",obs exp = new self.assertTrue(array_equal(obs,exp)) def test_weighted_average_tip_prediction(self): """Weighted average node prediction should predict node values""" # When the node is very close to I3, prediction should be approx. I3 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I3"] self.assertFloatEqual(around(prediction),exp) # When the node is very close to I1, prediction should be approx. I1 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI1Tree) node_to_predict = "A" #print "tree:",tree.asciiArt() node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I1"] #print "prediction:",prediction #print "exp:",exp a_node = tree.getNodeMatchingName('A') #for node in tree.preorder(): # print node.Name,node.distance(a_node),node.Reconstruction self.assertFloatEqual(around(prediction),exp) # Try out the B case with exponential weighting traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) #prediction = weighted_average_tip_prediction(tree=tree,\ # node_to_predict=node_to_predict,weight_fn=weight_fn) exp = traits["B"] self.assertFloatEqual(around(prediction),exp) # Try out the I1 case with exponential weighting traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI1Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) #weight_fn = linear_weight node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I1"] self.assertFloatEqual(around(prediction),exp) # Try out the balanced case where children and ancestors # should be weighted a equally with exponential weighting # We'll try this with full gene count data to ensure # that case is tested traits = self.GeneCountTraits tree = assign_traits_to_tree(traits,self.BetweenI3AndI1Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) #prediction = weighted_average_tip_prediction(tree=tree,\ # node_to_predict=node_to_predict,weight_fn=weight_fn) exp = (array(traits["I1"]) + array(traits["I3"]))/2.0 self.assertFloatEqual(prediction,exp) #TODO: test the case with partial missing data (Nones) #TODO: test the case with fully missing data for either # the ancestor or the children. #TODO: Test with polytomy trees # These *should* work, but until they're tested we don't know def test_get_interval_z_prob(self): """get_interval_z_prob should get the probability of a Z-score on an interval""" #Approximate expected values were calculated from #the table of z-values found in: #Larson, Ron; Farber, Elizabeth (2004). #Elementary Statistics: Picturing the World. P. 214, #As recorded here: http://en.wikipedia.org/wiki/Standard_normal_table #-- Test 1 -- #Expected values for 0 - 0.01 obs = get_interval_z_prob(0.0,0.01) #Larson & Farber reported values are: #For z of 0.00, p= 0.5000 #For z of 0.01, p= 0.5040 exp = 0.0040 self.assertFloatEqual(obs,exp,eps=0.01) #Error is around 1e-5 from estimate #-- Test 2 -- # 0.75 - 0.80 obs = get_interval_z_prob(0.75,0.80) #Larson & Farber reported values are: #For z of 0.75, p= 0.7734 #For z of 0.80, p= 0.7881 exp = 0.7881 - 0.7734 self.assertFloatEqual(obs,exp,eps=0.01) def test_thresholded_brownian_probability(self): """Brownian prob should return dict of state probabilities""" #x = thresholded_brownian_probability(2.2755, 1001**0.5, 0.03, min_val = 0.0,increment = 1.00,trait_prob_cutoff = 1e-4) #lines = ["\t".join(map(str,[k,x[k]]))+"\n" for k in sorted(x.keys())] #for line in lines: # print line #print "Total prob:", sum(x.values()) start_state = 3.0 var = 30.00 d = 0.03 min_val = 0.0 increment = 1.0 trait_prob_cutoff = 1e-200 obs = thresholded_brownian_probability(start_state,d,var,min_val,\ increment,trait_prob_cutoff) #TODO: Need to calculate exact values for this minimal case #with the Larson & Farber Z tables, by hand. #For now test for sanity #Test that no probabilities below threshold are included self.assertTrue(min(obs.values()) > trait_prob_cutoff) #Test that start values +1 or -1 are equal self.assertEqual(obs[2.0],obs[4.0]) #Test that the start state is the highest prob value self.assertEqual(max(obs.values()),obs[start_state]) def test_fit_normal_to_confidence_interval(self): """fit_normal_to_confidence_interval should return a mean and variance given CI""" #Lets use a normal distribution to generate test values normal_95 = ndtri(0.95) mean = 0 upper = mean + normal_95 lower = mean - normal_95 obs_mean,obs_var =\ fit_normal_to_confidence_interval(upper,lower,confidence=0.95) exp_mean = mean exp_var = 1.0 self.assertFloatEqual(obs_mean,exp_mean) self.assertFloatEqual(obs_var,exp_var) #An alternative normal: normal_99 = ndtri(0.99) mean = 5.0 upper = mean + normal_99 lower = mean - normal_99 obs_mean,obs_var =\ fit_normal_to_confidence_interval(upper,lower,confidence=0.99) exp_mean = mean exp_var = 1.0 self.assertFloatEqual(obs_mean,exp_mean) self.assertFloatEqual(obs_var,exp_var) def test_variance_of_weighted_mean(self): """variance_of_weighted_mean calculates the variance of a weighted mean""" #Just a hand calculated example using the formula from here: #http://en.wikipedia.org/wiki/Weighted_mean #TODO: test if this works for arrays of variances #If all weights and standard deviations are equal, then #variance = stdev/sqrt(n) weights = array([0.5,0.5]) sample_stdevs = array([4.0,4.0]) variances = sample_stdevs**2 exp = 4.0/sqrt(2.0) obs = variance_of_weighted_mean(weights,variances) self.assertFloatEqual(obs,exp) #If standard deviations are equal, but weights are not, the result #is equal to stdev*sqrt(sum(squared_weights)) weights = array([0.1,0.9]) sample_stdevs = array([4.0,4.0]) variances = sample_stdevs**2 exp_unbalanced = 4.0*sqrt(sum(weights**2)) obs = variance_of_weighted_mean(weights,variances) self.assertEqual(obs,exp_unbalanced) #If all standard deviations are equal: #The minimal value for the variance is when all weights are equal #the maximal value is when one weight is 1.0 and another is 0.0 sample_variances = array([3.0,3.0,3.0,3.0]) balanced_weights = array([0.25,0.25,0.25,0.25]) two_weights = array([0.0,0.50,0.50,0.0]) unbalanced_weights = array([0.0,1.0,0.0,0.0]) balanced_variance = variance_of_weighted_mean(balanced_weights,sample_variances) two_weight_variance = variance_of_weighted_mean(two_weights,sample_variances) unbalanced_variance = variance_of_weighted_mean(unbalanced_weights,sample_variances) #We expect balanced_variance < two-weight_variance < unbalanced_variance self.assertTrue(balanced_variance < two_weight_variance) self.assertTrue(balanced_variance < unbalanced_variance) self.assertTrue(two_weight_variance < unbalanced_variance) #Check that doing this for two 1D arrays is equal to using a single 2d array weights1 = array([0.1,0.9]) weights2 = array([0.5,0.5]) vars1 = array([4.0,4.0]) vars2 = array([1000.0,1000.0]) obs1 = variance_of_weighted_mean(weights1,vars1) obs2 = variance_of_weighted_mean(weights2,vars2) #Expect that calculating the result as a single 2D array #gives identical results to calculating as two 1D arrays exp = array([obs1,obs2]) combined_weights = array([[0.1,0.9],[0.5,0.5]]) combined_vars = array([[4.0,4.0],[1000.0,1000.0]]) combined_obs = variance_of_weighted_mean(combined_weights,combined_vars) self.assertFloatEqual(combined_obs,exp) def test_normal_product_monte_carlo(self): """normal_product_monte_carlo calculates the confidence limits of two normal distributions empirically""" # Need good test data here. #The APPL statistical language apparently has an analytical # solution to the product normal that could be used #Result for product of two standard normal distributions lower,upper = normal_product_monte_carlo(0.0,1.0,0.0,1.0) #print "95% confidence limit for product of two standard normal distributions:",lower,upper # 1.60 corresponds to the value for the 0.10 (10%) confidence limit #when using a two-tailed test. #Therefore for the one tailed upper limit, I believe we expect 1.60 to #correspond to a type I error rate of 0.05 #self.assertFloatEqual(lower,-1.60,eps=.1) #self.assertFloatEqual(upper,1.60,eps=.1) #result = normal_product_monte_carlo(1.0/3.0,1.0,2.0,1.0) #print result mean1 = 0.4 mean2 = 1.2 v1 = 1.0 v2 = 1.0 lower,upper = normal_product_monte_carlo(mean1,v1,mean2,\ v2,confidence=0.95) #print "confidence limit for product of two normal distributions:",\ # lower,upper lower_estimate = mean1*mean2 + lower upper_estimate = mean1*mean2 + upper #self.assertFloatEqual(lower_estimate,-1.8801,eps=.1) #self.assertFloatEqual(upper_estimate,2.3774,eps=.1) def test_get_bounds_from_histogram(self): """Get bounds from histogram finds upper and lower tails of distribution at specified confidence levels""" #Test a simple array test_hist = array([0.01,0.98,0.01]) test_bin_edges = arange(3) obs_lower,obs_upper = get_bounds_from_histogram(test_hist,test_bin_edges,confidence=0.90) #Upper and lower bounds should be conservative, and therefore exclude the center exp_lower = 1 exp_upper = 2 self.assertFloatEqual(obs_lower,exp_lower) self.assertFloatEqual(obs_upper,exp_upper) # Confirm that summing the histogram over given indices # gives <= confidence % of the mass obs_sum_lower = sum(test_hist[:obs_lower]) self.assertTrue(obs_sum_lower <= 0.05*sum(test_hist)) obs_sum_upper = sum(test_hist[obs_upper:]) self.assertTrue(obs_sum_upper <= 0.05*sum(test_hist)) #Repeat for a more complex test case test_hist =array([1.0,2.0,0.0,5.0,25.0,2.0,50.0,10.0,5.0,1.0]) test_bin_edges = array(arange(len(test_hist)+1)) obs_lower,obs_upper = get_bounds_from_histogram(test_hist,test_bin_edges,confidence=0.90) exp_lower = 3 exp_upper = 9 self.assertFloatEqual(obs_lower,exp_lower) self.assertFloatEqual(obs_upper,exp_upper) obs_sum_lower = sum(test_hist[:obs_lower]) self.assertTrue(obs_sum_lower <= 0.05*sum(test_hist)) obs_sum_upper = sum(test_hist[obs_upper:]) self.assertTrue(obs_sum_upper <= 0.05*sum(test_hist)) def test_get_brownian_motion_param_from_confidence_intervals(self): """Get brownian motion parameters from confidence intervals""" #TODO: Ensure this works with arrays of brownian motions tree = self.SimpleTree #Test one-trait case traits = {"A":[1.0],"C":[2.0],"E":[1.0],"F":[1.0]} tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") tree.getNodeMatchingName('E').upper_bound = [2.0] tree.getNodeMatchingName('F').upper_bound = [1.0] tree.getNodeMatchingName('E').lower_bound = [0.0] tree.getNodeMatchingName('F').lower_bound = [1.0] brownian_motion_parameter =\ get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label="Reconstruction",\ confidence=0.95) #self.assertFloatEqual(brownian_motion_parameter,[1.0]) self.assertEqual(len(brownian_motion_parameter),1) #Test two-trait case traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") true_brownian_motion_param = 5.0 #E_histogram = thresholded_brownian_probability(1.0,\ # true_brownian_motion_param,d=0.01) #E_true_lower,E_true_upper = get_bounds_from_histogram(E_histogram,test_bin_edges,confidence=0.95) #set up tree with confidence intervals #{"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} #DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;") tree.getNodeMatchingName('E').upper_bound = [1.0,1.0] tree.getNodeMatchingName('F').upper_bound = [1.0,2.0] tree.getNodeMatchingName('E').lower_bound = [-2.0,-2.0] tree.getNodeMatchingName('F').lower_bound = [-1.0,0.0] brownian_motion_parameter =\ get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label="Reconstruction",\ confidence=0.95) #self.assertFloatEqual(brownian_motion_parameter,[1.0,1.0]) self.assertEqual(len(brownian_motion_parameter),2)
class TestPredictTraits(TestCase): """Tests of predict_traits.py""" def setUp(self): self.SimpleTree = \ DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;") #Set up a tree with obvious differences in the rate of gene content #evolution to test confidence interval estimation #Features: # --trait 1 is has ~ 10 fold higher confidence intervals than trait 0. # Trait 2 is 10 fold higher than trait 1 # -- of predicted nodes B and D, D has a ~10 fold longer branch self.SimpleUnequalVarianceTree =\ DndParser("((A:0.01,B:0.01)E:0.05,(C:0.01,D:0.10)F:0.05)root;") traits = {"A":[1.0,1.0,1.0],"C":[1.0,1.0,1.0],"E":[1.0,1.0,1.0],"F":[1.0,1.0,1.0]} self.SimpleUnequalVarianceTree = assign_traits_to_tree(traits,\ self.SimpleUnequalVarianceTree,trait_label="Reconstruction") self.SimpleUnequalVarianceTree.getNodeMatchingName('E').upper_bound = [2.0,20.0,200.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('E').lower_bound = [-1.0,-19.0,-199.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('F').upper_bound = [2.0,20.0,200.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('F').lower_bound = [-1.0,-19.0,-199.0] #Set up a tree with a three-way polytomy self.SimplePolytomyTree = \ DndParser("((A:0.02,B:0.01,B_prime:0.03)E:0.05,(C:0.01,D:0.01)F:0.05)root;") self.SimpleTreeTraits =\ {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} self.PartialReconstructionTree =\ DndParser("((((B:0.01,C:0.01)I3:0.01,A:0.01)I2:0.01,D:0.01)I1:0.01)root;") self.CloseToI3Tree =\ DndParser("((((B:0.01,C:0.95)I3:0.01,A:0.01)I2:0.95,D:0.05)I1:0.95)root;") self.CloseToI1Tree =\ DndParser("((((B:0.95,C:0.95)I3:0.95,A:0.01)I2:0.02,D:0.05)I1:0.05)root;") self.BetweenI3AndI1Tree=\ DndParser("((((B:0.01,C:0.1)I3:0.02,A:0.01)I2:0.02,D:0.05)I1:0.02)root;") self.PartialReconstructionTraits =\ {"B":[1.0,1.0],"C":[1.0,1.0],"I3":[1.0,1.0],"I1":[0.0,1.0],"D":[0.0,1.0]} self.GeneCountTraits =\ {"B":[1.0,1.0],"C":[1.0,2.0],"I3":[1.0,1.0],"I1":[0.0,3.0],"D":[0.0,5.0]} #create a tmp trait file self.in_trait1_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_trait1_file=open(self.in_trait1_fp,'w') self.in_trait1_file.write(in_trait1) self.in_trait1_file.close() #create another tmp trait file (with columns in different order) self.in_trait2_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_trait2_file=open(self.in_trait2_fp,'w') self.in_trait2_file.write(in_trait2) self.in_trait2_file.close() #create a tmp trait file with a incorrect trait name self.in_bad_trait_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_bad_trait_file=open(self.in_bad_trait_fp,'w') self.in_bad_trait_file.write(in_bad_trait) self.in_bad_trait_file.close() self.files_to_remove = [self.in_trait1_fp,self.in_trait2_fp,self.in_bad_trait_fp] def tearDown(self): remove_files(self.files_to_remove) def test_nearest_neighbor_prediction(self): """nearest_neighbor_prediction predicts nearest neighbor's traits""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Test with default options results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"]) self.assertEqual(results["B"],array([1.0,1.0])) self.assertEqual(results["C"],array([0.0,0.0])) #Test allowing ancestral NNs results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"],\ tips_only = False) self.assertEqual(results["C"],array([0.0,1.0])) #Test allowing self to be NN AND Ancestral NNs results = predict_nearest_neighbor(tree, nodes_to_predict =["A","B","C","D"],\ tips_only = False,use_self_in_prediction=True) self.assertEqual(results["A"],array([1.0,1.0])) self.assertEqual(results["B"],array([1.0,1.0])) self.assertEqual(results["C"],array([0.0,1.0])) self.assertEqual(results["D"],array([0.0,0.0])) def test_calc_nearest_sequenced_taxon_index(self): """calc_nearest_sequenced_taxon_index calculates the NSTI measure""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Expected distances: # A --> A 0.0 # B --> A 0.03 # C --> D 0.02 # D --> D 0.0 # = 0.05/4.0 = 0.0125 exp = 0.0125 verbose = False #Test with default options obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,verbose=verbose) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["A"],0.0) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02) self.assertFloatEqual(obs_distances["D"],0.00) #Test calcing the index while #limiting prediction to B and C # B --> A 0.03 # C --> D 0.02 exp = 0.025 obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = ["B","C"],verbose=False) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02) def test_get_nn_by_tree_descent(self): """calc_nearest_sequenced_taxon_index calculates the NSTI measure""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Expected distances: # A --> A 0.0 # B --> A 0.03 # C --> D 0.02 # D --> D 0.0 # = 0.05/4.0 = 0.0125 exp = 0.0125 #Test with default options nn,distance = get_nn_by_tree_descent(tree,"B",verbose=True) self.assertEqual(nn.Name,"A") self.assertFloatEqual(distance,0.03) nn,distance = get_nn_by_tree_descent(tree,"A",verbose=True) self.assertEqual(nn.Name,"A") self.assertFloatEqual(distance,0.00) nn,distance = get_nn_by_tree_descent(tree,"A",filter_by_property=False,verbose=True) self.assertEqual(nn.Name,"B") self.assertFloatEqual(distance,0.03) nn,distance = get_nn_by_tree_descent(tree,"C",verbose=True) self.assertEqual(nn.Name,"D") self.assertFloatEqual(distance,0.02) #self.assertFloatEqual(obs_distances["A"],0.0) #self.assertFloatEqual(obs_distances["B"],0.03) #self.assertFloatEqual(obs_distances["C"],0.02) #self.assertFloatEqual(obs_distances["D"],0.00) #Test calcing the index while #limiting prediction to B and C # B --> A 0.03 # C --> D 0.02 exp = 0.025 obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = ["B","C"],verbose=False) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02) def test_predict_random_neighbor(self): """predict_random_neighbor predicts randomly""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree) #If there is only one other valid result, this #should always be predicted #self.SimpleTreeTraits =\ # {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} #If self predictions are disallowed, then the prediction for A should #always come from node D, and be 0,0. results = predict_random_neighbor(tree,['A'],\ trait_label = "Reconstruction",\ use_self_in_prediction=False) self.assertEqual(results['A'],[0.0,0.0]) #If use_self is True, ~50% of predictions should be [1.0,1.0] and # half should be [0.0,0.0] #Pick repeatedly and make sure frequencies are #reasonable. The technique is fast, so #many iterations are reasonable. iterations = 100000 a_predictions = 0 d_predictions = 0 for i in range(iterations): results = predict_random_neighbor(tree,['A'],\ trait_label = "Reconstruction",\ use_self_in_prediction=True) #print results if results['A'] == [1.0,1.0]: #print "A pred" a_predictions += 1 elif results['A'] == [0.0,0.0]: #print "D pred" d_predictions +=1 else: raise RuntimeError(\ "Bad prediction result: Neither node A nor node D traits used in prediction") #print "All a predictions:",a_predictions #print "All d predictions:",d_predictions ratio = float(a_predictions)/float(iterations) #print "Ratio:", ratio self.assertFloatEqual(ratio,0.5,eps=1e-2) def test_get_nearest_annotated_neightbor(self): """get_nearest_annotated_neighbor finds nearest relative with traits""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree) #Test ancestral NN matching nn = get_nearest_annotated_neighbor(tree,'A',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'E') nn = get_nearest_annotated_neighbor(tree,'B',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'E') nn = get_nearest_annotated_neighbor(tree,'C',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'F') nn = get_nearest_annotated_neighbor(tree,'D',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'F') #Test tip only, non-self matching nn = get_nearest_annotated_neighbor(tree,'A',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'D') nn = get_nearest_annotated_neighbor(tree,'B',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'A') nn = get_nearest_annotated_neighbor(tree,'C',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'D') nn = get_nearest_annotated_neighbor(tree,'D',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'A') def test_biom_table_from_predictions(self): """format predictions into biom format""" traits = self.SimpleTreeTraits tree = self.SimpleTree #print "Starting tree:",tree.asciiArt() # Test on simple tree result_tree = assign_traits_to_tree(traits,tree) nodes_to_predict = [n.Name for n in result_tree.tips()] #print "Predicting nodes:", nodes_to_predict predictions = predict_traits_from_ancestors(result_tree,\ nodes_to_predict) biom_table=biom_table_from_predictions(predictions,["trait1","trait2"]) def test_equal_weight(self): """constant_weight weights by a constant""" w = 1.0 d = 0.1 for i in range(100): obs = equal_weight(i) exp = w self.assertFloatEqual(obs,exp) def test_make_neg_exponential_weight_fn(self): """make_neg_exponential_weight_fn returns the specified fn""" exp_base = 10 weight_fn = make_neg_exponential_weight_fn(exp_base) d = 10.0 obs = weight_fn(d) exp = 10.0**-10.0 self.assertFloatEqual(obs,exp) #Test for base two exp_base = 2 weight_fn = make_neg_exponential_weight_fn(exp_base) d = 16.0 obs = weight_fn(d) exp = 2.0**-16.0 self.assertFloatEqual(obs,exp) def test_linear_weight(self): """linear_weight weights linearly""" max_d = 1.0 d = 0.90 obs = linear_weight(d,max_d) exp = 0.10 self.assertFloatEqual(obs, exp) d = 0.0 obs = linear_weight(d,max_d) exp = 1.0 self.assertFloatEqual(obs, exp) max_d = 3.0 d = 1.5 obs = linear_weight(d,max_d) exp = 0.50 self.assertFloatEqual(obs, exp) def test_inverse_variance_weight(self): """inverse_variance_weight""" #TODO: test this works with arrays of variances var = 1000.0 for d in range(1,10): d = float(d) obs = inverse_variance_weight(d,var) exp = 1.0/1000.0 self.assertFloatEqual(obs,exp) #Now test the special case of zero variance var = 0.0 for d in range(1,10): d = float(d) obs = inverse_variance_weight(d,var) exp = 1.0/1e-10 self.assertFloatEqual(obs,exp) def test_assign_traits_to_tree(self): """assign_traits_to_tree should map reconstructed traits to tree nodes""" # Test that the function assigns traits from a dict to a tree node traits = self.SimpleTreeTraits tree = self.SimpleTree # Test on simple tree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp) # Test on polytomy tree tree = self.SimplePolytomyTree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp) def test_assign_traits_to_tree_quoted_node_name(self): """Assign_traits_to_tree should remove quotes from node names""" # Test that the function assigns traits from a dict to a tree node traits = self.SimpleTreeTraits tree = self.SimpleTree #Make one node quoted tree.getNodeMatchingName('A').Name="'A'" tree.getNodeMatchingName('B').Name='"B"' # Test on simple tree result_tree = assign_traits_to_tree(traits,tree,fix_bad_labels=True) #Setting fix_bad_labels to false produces NoneType predictions when #labels are quoted # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name.strip("'").strip('"'), None) self.assertEqual(obs,exp) # Test on polytomy tree tree = self.SimplePolytomyTree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp) def test_update_trait_dict_from_file(self): """update_trait_dict_from_file should parse input trait tables (asr and genome) and match traits between them""" header,traits=update_trait_dict_from_file(self.in_trait1_fp) self.assertEqual(header,["trait2","trait1"]) self.assertEqual(traits,{3:[3,1],'A':[5,2.5],'D':[5,2]}) #test that we get a warning when header from other trait table doesn't match perfectly. with catch_warnings(record=True) as w: header2,traits2=update_trait_dict_from_file(self.in_trait2_fp,header) self.assertEqual(header2,["trait2","trait1"]) self.assertEqual(traits2,{1:[3,1], 2:[3,0], 3:[3,2]}) assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Missing" in str(w[-1].message) #try giving a trait table with a trait that doesn't match our header self.assertRaises(RuntimeError,update_trait_dict_from_file,self.in_bad_trait_fp,header) def test_predict_traits_from_ancestors(self): """predict_traits_from_ancestors should propagate ancestral states""" # Testing the point predictions first (since these are easiest) # When the node is very close to I3, prediction should be approx. I3 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) nodes_to_predict = ['A'] prediction = predict_traits_from_ancestors(tree=tree,\ nodes_to_predict=nodes_to_predict) exp = traits["I3"] #print "PREDICTION:",prediction for node in nodes_to_predict: self.assertFloatEqual(around(prediction[node]),exp) #TODO: need to add test case where a very hard to predict # single value is present in a sequenced genome. Then # test that use_self_in_prediction controls whether this is used def test_predict_traits_from_ancestors_correctly_predicts_variance(self): """predict_traits_from_ancestors should correctly report variance due to branch lengths and rates of gene copy number evolution """ tree = self.SimpleUnequalVarianceTree #All values are 1, but variance in the prediction should vary #due to vary unequal branch lengths (between taxa) and brownian #motion parameters (between traits) nodes_to_predict = ['B','D'] bm_fixed_10_fold = [1.0,10.0,100.0] prediction,variances,confidence_intervals = predict_traits_from_ancestors(tree=tree,\ nodes_to_predict=nodes_to_predict,calc_confidence_intervals=True,\ lower_bound_trait_label='lower_bound',upper_bound_trait_label='upper_bound', brownian_motion_parameter = bm_fixed_10_fold,trait_label="Reconstruction") #All traits are 1, so all predictions should be 1 exp_predictions = {'B':[1.0,1.0,1.0],'D':[1.0,1.0,1.0]} self.assertEqualItems(prediction,exp_predictions) #We don't expect variances to be exactly 10 fold increasing #but do expect they should be in rank order for tip in ['B','D']: tip_vars = variances[tip]['variance'] self.assertTrue(tip_vars[0]<tip_vars[1]) self.assertTrue(tip_vars[1]<tip_vars[2]) #Also note that trait D is on a much longer branch, so we expect #it to have higher variance self.assertTrue((array(variances['B']['variance'])<array(variances['D']['variance'])).all()) def test_fill_unknown_traits(self): """fill_unknown_traits should propagate only known characters""" # Only the missing values in to_update should be # filled in with appropriate values from new to_update = array([1,0,1,None,1,0]) new = array([None,None,1,1,1,1]) obs = fill_unknown_traits(to_update,new) exp = array([1,0,1,1,1,0]) self.assertTrue(array_equal(obs,exp)) #Try the reverse update obs = fill_unknown_traits(new,to_update) exp = array([1,0,1,1,1,1]) self.assertTrue(array_equal(obs,exp)) # Ensure that if to_update is None, the value of new is returned obs = fill_unknown_traits(None, new) #print "Obs:",obs exp = new self.assertTrue(array_equal(obs,exp)) def test_weighted_average_tip_prediction(self): """Weighted average node prediction should predict node values""" # When the node is very close to I3, prediction should be approx. I3 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I3"] self.assertFloatEqual(around(prediction),exp) # When the node is very close to I1, prediction should be approx. I1 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI1Tree) node_to_predict = "A" #print "tree:",tree.asciiArt() node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I1"] #print "prediction:",prediction #print "exp:",exp a_node = tree.getNodeMatchingName('A') #for node in tree.preorder(): # print node.Name,node.distance(a_node),node.Reconstruction self.assertFloatEqual(around(prediction),exp) # Try out the B case with exponential weighting traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) #prediction = weighted_average_tip_prediction(tree=tree,\ # node_to_predict=node_to_predict,weight_fn=weight_fn) exp = traits["B"] self.assertFloatEqual(around(prediction),exp) # Try out the I1 case with exponential weighting traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI1Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) #weight_fn = linear_weight node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I1"] self.assertFloatEqual(around(prediction),exp) # Try out the balanced case where children and ancestors # should be weighted a equally with exponential weighting # We'll try this with full gene count data to ensure # that case is tested traits = self.GeneCountTraits tree = assign_traits_to_tree(traits,self.BetweenI3AndI1Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) #prediction = weighted_average_tip_prediction(tree=tree,\ # node_to_predict=node_to_predict,weight_fn=weight_fn) exp = (array(traits["I1"]) + array(traits["I3"]))/2.0 self.assertFloatEqual(prediction,exp) #TODO: test the case with partial missing data (Nones) #TODO: test the case with fully missing data for either # the ancestor or the children. #TODO: Test with polytomy trees # These *should* work, but until they're tested we don't know def test_get_interval_z_prob(self): """get_interval_z_prob should get the probability of a Z-score on an interval""" #Approximate expected values were calculated from #the table of z-values found in: #Larson, Ron; Farber, Elizabeth (2004). #Elementary Statistics: Picturing the World. P. 214, #As recorded here: http://en.wikipedia.org/wiki/Standard_normal_table #-- Test 1 -- #Expected values for 0 - 0.01 obs = get_interval_z_prob(0.0,0.01) #Larson & Farber reported values are: #For z of 0.00, p= 0.5000 #For z of 0.01, p= 0.5040 exp = 0.0040 self.assertFloatEqual(obs,exp,eps=0.01) #Error is around 1e-5 from estimate #-- Test 2 -- # 0.75 - 0.80 obs = get_interval_z_prob(0.75,0.80) #Larson & Farber reported values are: #For z of 0.75, p= 0.7734 #For z of 0.80, p= 0.7881 exp = 0.7881 - 0.7734 self.assertFloatEqual(obs,exp,eps=0.01) def test_thresholded_brownian_probability(self): """Brownian prob should return dict of state probabilities""" #x = thresholded_brownian_probability(2.2755, 1001**0.5, 0.03, min_val = 0.0,increment = 1.00,trait_prob_cutoff = 1e-4) #lines = ["\t".join(map(str,[k,x[k]]))+"\n" for k in sorted(x.keys())] #for line in lines: # print line #print "Total prob:", sum(x.values()) start_state = 3.0 var = 30.00 d = 0.03 min_val = 0.0 increment = 1.0 trait_prob_cutoff = 1e-200 obs = thresholded_brownian_probability(start_state,d,var,min_val,\ increment,trait_prob_cutoff) #TODO: Need to calculate exact values for this minimal case #with the Larson & Farber Z tables, by hand. #For now test for sanity #Test that no probabilities below threshold are included self.assertTrue(min(obs.values()) > trait_prob_cutoff) #Test that start values +1 or -1 are equal self.assertEqual(obs[2.0],obs[4.0]) #Test that the start state is the highest prob value self.assertEqual(max(obs.values()),obs[start_state]) def test_fit_normal_to_confidence_interval(self): """fit_normal_to_confidence_interval should return a mean and variance given CI""" #Lets use a normal distribution to generate test values normal_95 = ndtri(0.95) mean = 0 upper = mean + normal_95 lower = mean - normal_95 obs_mean,obs_var =\ fit_normal_to_confidence_interval(upper,lower,confidence=0.95) exp_mean = mean exp_var = 1.0 self.assertFloatEqual(obs_mean,exp_mean) self.assertFloatEqual(obs_var,exp_var) #An alternative normal: normal_99 = ndtri(0.99) mean = 5.0 upper = mean + normal_99 lower = mean - normal_99 obs_mean,obs_var =\ fit_normal_to_confidence_interval(upper,lower,confidence=0.99) exp_mean = mean exp_var = 1.0 self.assertFloatEqual(obs_mean,exp_mean) self.assertFloatEqual(obs_var,exp_var) def test_variance_of_weighted_mean(self): """variance_of_weighted_mean calculates the variance of a weighted mean""" #Just a hand calculated example using the formula from here: #http://en.wikipedia.org/wiki/Weighted_mean #TODO: test if this works for arrays of variances #If all weights and standard deviations are equal, then #variance = stdev/sqrt(n) weights = array([0.5,0.5]) sample_stdevs = array([4.0,4.0]) variances = sample_stdevs**2 exp = 4.0/sqrt(2.0) obs = variance_of_weighted_mean(weights,variances) self.assertFloatEqual(obs,exp) #If standard deviations are equal, but weights are not, the result #is equal to stdev*sqrt(sum(squared_weights)) weights = array([0.1,0.9]) sample_stdevs = array([4.0,4.0]) variances = sample_stdevs**2 exp_unbalanced = 4.0*sqrt(sum(weights**2)) obs = variance_of_weighted_mean(weights,variances) self.assertEqual(obs,exp_unbalanced) #If all standard deviations are equal: #The minimal value for the variance is when all weights are equal #the maximal value is when one weight is 1.0 and another is 0.0 sample_variances = array([3.0,3.0,3.0,3.0]) balanced_weights = array([0.25,0.25,0.25,0.25]) two_weights = array([0.0,0.50,0.50,0.0]) unbalanced_weights = array([0.0,1.0,0.0,0.0]) balanced_variance = variance_of_weighted_mean(balanced_weights,sample_variances) two_weight_variance = variance_of_weighted_mean(two_weights,sample_variances) unbalanced_variance = variance_of_weighted_mean(unbalanced_weights,sample_variances) #We expect balanced_variance < two-weight_variance < unbalanced_variance self.assertTrue(balanced_variance < two_weight_variance) self.assertTrue(balanced_variance < unbalanced_variance) self.assertTrue(two_weight_variance < unbalanced_variance) #Check that doing this for two 1D arrays is equal to using a single 2d array weights1 = array([0.1,0.9]) weights2 = array([0.5,0.5]) vars1 = array([4.0,4.0]) vars2 = array([1000.0,1000.0]) obs1 = variance_of_weighted_mean(weights1,vars1) obs2 = variance_of_weighted_mean(weights2,vars2) #Expect that calculating the result as a single 2D array #gives identical results to calculating as two 1D arrays exp = array([obs1,obs2]) combined_weights = array([[0.1,0.9],[0.5,0.5]]) combined_vars = array([[4.0,4.0],[1000.0,1000.0]]) combined_obs = variance_of_weighted_mean(combined_weights,combined_vars) self.assertFloatEqual(combined_obs,exp) def test_normal_product_monte_carlo(self): """normal_product_monte_carlo calculates the confidence limits of two normal distributions empirically""" # Need good test data here. #The APPL statistical language apparently has an analytical # solution to the product normal that could be used #Result for product of two standard normal distributions lower,upper = normal_product_monte_carlo(0.0,1.0,0.0,1.0) #print "95% confidence limit for product of two standard normal distributions:",lower,upper # 1.60 corresponds to the value for the 0.10 (10%) confidence limit #when using a two-tailed test. #Therefore for the one tailed upper limit, I believe we expect 1.60 to #correspond to a type I error rate of 0.05 #self.assertFloatEqual(lower,-1.60,eps=.1) #self.assertFloatEqual(upper,1.60,eps=.1) #result = normal_product_monte_carlo(1.0/3.0,1.0,2.0,1.0) #print result mean1 = 0.4 mean2 = 1.2 v1 = 1.0 v2 = 1.0 lower,upper = normal_product_monte_carlo(mean1,v1,mean2,\ v2,confidence=0.95) #print "confidence limit for product of two normal distributions:",\ # lower,upper lower_estimate = mean1*mean2 + lower upper_estimate = mean1*mean2 + upper #self.assertFloatEqual(lower_estimate,-1.8801,eps=.1) #self.assertFloatEqual(upper_estimate,2.3774,eps=.1) def test_get_bounds_from_histogram(self): """Get bounds from histogram finds upper and lower tails of distribution at specified confidence levels""" #Test a simple array test_hist = array([0.01,0.98,0.01]) test_bin_edges = arange(3) obs_lower,obs_upper = get_bounds_from_histogram(test_hist,test_bin_edges,confidence=0.90) #Upper and lower bounds should be conservative, and therefore exclude the center exp_lower = 1 exp_upper = 2 self.assertFloatEqual(obs_lower,exp_lower) self.assertFloatEqual(obs_upper,exp_upper) # Confirm that summing the histogram over given indices # gives <= confidence % of the mass obs_sum_lower = sum(test_hist[:obs_lower]) self.assertTrue(obs_sum_lower <= 0.05*sum(test_hist)) obs_sum_upper = sum(test_hist[obs_upper:]) self.assertTrue(obs_sum_upper <= 0.05*sum(test_hist)) #Repeat for a more complex test case test_hist =array([1.0,2.0,0.0,5.0,25.0,2.0,50.0,10.0,5.0,1.0]) test_bin_edges = array(arange(len(test_hist)+1)) obs_lower,obs_upper = get_bounds_from_histogram(test_hist,test_bin_edges,confidence=0.90) exp_lower = 3 exp_upper = 9 self.assertFloatEqual(obs_lower,exp_lower) self.assertFloatEqual(obs_upper,exp_upper) obs_sum_lower = sum(test_hist[:obs_lower]) self.assertTrue(obs_sum_lower <= 0.05*sum(test_hist)) obs_sum_upper = sum(test_hist[obs_upper:]) self.assertTrue(obs_sum_upper <= 0.05*sum(test_hist)) def test_get_brownian_motion_param_from_confidence_intervals(self): """Get brownian motion parameters from confidence intervals""" #TODO: Ensure this works with arrays of brownian motions tree = self.SimpleTree #Test one-trait case traits = {"A":[1.0],"C":[2.0],"E":[1.0],"F":[1.0]} tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") tree.getNodeMatchingName('E').upper_bound = [2.0] tree.getNodeMatchingName('F').upper_bound = [1.0] tree.getNodeMatchingName('E').lower_bound = [0.0] tree.getNodeMatchingName('F').lower_bound = [1.0] brownian_motion_parameter =\ get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label="Reconstruction",\ confidence=0.95) #self.assertFloatEqual(brownian_motion_parameter,[1.0]) self.assertEqual(len(brownian_motion_parameter),1) #Test two-trait case traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") true_brownian_motion_param = 5.0 #E_histogram = thresholded_brownian_probability(1.0,\ # true_brownian_motion_param,d=0.01) #E_true_lower,E_true_upper = get_bounds_from_histogram(E_histogram,test_bin_edges,confidence=0.95) #set up tree with confidence intervals #{"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} #DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;") tree.getNodeMatchingName('E').upper_bound = [1.0,1.0] tree.getNodeMatchingName('F').upper_bound = [1.0,2.0] tree.getNodeMatchingName('E').lower_bound = [-2.0,-2.0] tree.getNodeMatchingName('F').lower_bound = [-1.0,0.0] brownian_motion_parameter =\ get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label="Reconstruction",\ confidence=0.95) #self.assertFloatEqual(brownian_motion_parameter,[1.0,1.0]) self.assertEqual(len(brownian_motion_parameter),2)
def main(): f = open( '/clusterfs/ohana/external/SILVA/LTP_release_104/LTPs104_SSU_tree.newick' ) tree_string = f.read() f.close() tree = DndParser(tree_string, PhyloNode) taxon_id_of_name = {} taxon_id_of_name['Deinococcus_radiodurans__Y11332__Deinococcaceae'] = 1299 taxon_id_of_name[ 'Bacillus_subtilis_subsp._subtilis__AJ276351__Bacillaceae'] = 1423 taxon_id_of_name['Leptospira_interrogans__Z12817__Leptospiraceae'] = 173 taxon_id_of_name[ 'Mycobacterium_tuberculosis__X58890__Mycobacteriaceae'] = 1773 taxon_id_of_name[ 'Streptomyces_coelicoflavus__AB184650__Streptomycetaceae'] = 1902 taxon_id_of_name[ 'Methanocaldococcus_jannaschii___L77117__Methanocaldococcaceae'] = 2190 taxon_id_of_name[ 'Methanosarcina_acetivorans__M59137__Methanosarcinaceae'] = 2214 taxon_id_of_name['Sulfolobus_solfataricus__D26490__Sulfolobaceae'] = 2287 taxon_id_of_name['Thermotoga_maritima__M21774__Thermotogaceae'] = 2336 taxon_id_of_name[ 'Rhodopirellula_baltica__BX294149__Planctomycetaceae'] = 265606 taxon_id_of_name[ 'Thermodesulfovibrio_yellowstonii__AB231858__Nitrospiraceae'] = 289376 taxon_id_of_name['Chlamydia_trachomatis__D89067__Chlamydiaceae'] = 315277 taxon_id_of_name[ 'Chloroflexus_aurantiacus__D38365__Chloroflexaceae'] = 324602 taxon_id_of_name[ 'Geobacter_sulfurreducens__U13928__Geobacteraceae'] = 35554 taxon_id_of_name[ 'Bradyrhizobium_japonicum__U69638__Bradyrhizobiaceae'] = 375 taxon_id_of_name[ 'Pseudomonas_aeruginosa__X06684__Pseudomonadaceae'] = 381754 taxon_id_of_name[ 'Halobacterium_salinarum__AJ496185__Halobacteriaceae'] = 478009 taxon_id_of_name[ 'Dictyoglomus_turgidum__CP001251__Dictyoglomaceae'] = 515635 taxon_id_of_name['Aquifex_pyrophilus__M83548__Aquificaceae'] = 63363 taxon_id_of_name[ 'Thermococcus_kodakarensis__D38650__Thermococcaceae'] = 69014 taxon_id_of_name[ 'Fusobacterium_nucleatum_subsp._nucleatum__AE009951__Fusobacteriaceae'] = 76856 taxon_id_of_name[ 'Bacteroides_thetaiotaomicron___AE015928__Bacteroidaceae'] = 818 taxon_id_of_name['Escherichia_coli__X80725__Enterobacteriaceae'] = 83333 node_of_taxon_id = {} for name in taxon_id_of_name: node_of_taxon_id[taxon_id_of_name[name]] = tree.getNodeMatchingName( name) max_distance = 0.0 for taxon_id1 in node_of_taxon_id: for taxon_id2 in node_of_taxon_id: if taxon_id1 < taxon_id2: distance \ = node_of_taxon_id[taxon_id1].distance(node_of_taxon_id[taxon_id2]) if distance > max_distance: max_distance = distance print "dist[%d][%d] = %g" % (taxon_id1, taxon_id2, distance) print "Maximum distance: %g" % max_distance scale = round(2.5 / max_distance) print "Scale:", scale for taxon_id1 in node_of_taxon_id: for taxon_id2 in node_of_taxon_id: if taxon_id1 < taxon_id2: threshold \ = node_of_taxon_id[taxon_id1].distance(node_of_taxon_id[taxon_id2]) threshold *= scale threshold = round(threshold * 8) threshold = 0.125 * threshold print "threshold_of_taxon_pair[%d][%d] = %g" \ % (taxon_id1, taxon_id2, threshold) print "Threholds with Eukaryotes" for taxon_id1 in [3702, 4896, 4932, 6239, 7227, 7955, 9606, 10090, 44689]: for taxon_id2 in node_of_taxon_id: if taxon_id1 < taxon_id2: print "threshold_of_taxon_pair[%d][%d] = 2.5" % (taxon_id1, taxon_id2) else: print "threshold_of_taxon_pair[%d][%d] = 2.5" % (taxon_id2, taxon_id1) for taxon_id1 in [3702, 4896, 4932, 6239, 7227, 7955, 9606, 10090, 44689]: for taxon_id2 in [1148, 33072, 374847]: if taxon_id1 < taxon_id2: print "threshold_of_taxon_pair[%d][%d] = 2.5" % (taxon_id1, taxon_id2) else: print "threshold_of_taxon_pair[%d][%d] = 2.5" % (taxon_id2, taxon_id1) print "Thresholds with more Eukaryotes" for taxon_id1 in node_of_taxon_id: for taxon_id2 in [ 10116, 9031, 81824, 7739, 7165, 6945, 665079, 6183, 5476, 5722, 5664, 5270, 5207, 5141, 4952, 45351, 451804, 36329, 35128, 184922, 145481, 13684 ]: if taxon_id1 < taxon_id2: print "threshold_of_taxon_pair[%d][%d] = 2.5" % (taxon_id1, taxon_id2) else: print "threshold_of_taxon_pair[%d][%d] = 2.5" % (taxon_id2, taxon_id1) for taxon_id1 in [1148, 33072, 374847]: for taxon_id2 in [ 10116, 9031, 81824, 7739, 7165, 6945, 665079, 6183, 5476, 5722, 5664, 5270, 5207, 5141, 4952, 45351, 451804, 36329, 35128, 184922, 145481, 13684 ]: if taxon_id1 < taxon_id2: print "threshold_of_taxon_pair[%d][%d] = 2.5" % (taxon_id1, taxon_id2) else: print "threshold_of_taxon_pair[%d][%d] = 2.5" % (taxon_id2, taxon_id1) print "Thresholds among Eukaryotes" for taxon_id1 in [5664, 5722, 35128, 36329, 184922]: for taxon_id2 in [ 3702, 4896, 4932, 6239, 7227, 7955, 9606, 10090, 44689, 10116, 9031, 81824, 7739, 7165, 6945, 665079, 6183, 5476, 5722, 5664, 5270, 5207, 5141, 4952, 45351, 451804, 36329, 35128, 184922, 145481, 13684 ]: if taxon_id1 < taxon_id2: print "threshold_of_taxon_pair[%d][%d] = 1.5" % (taxon_id1, taxon_id2) elif taxon_id2 < taxon_id1: print "threshold_of_taxon_pair[%d][%d] = 1.5" % (taxon_id2, taxon_id1) taxon_id1 = 145481 for taxon_id2 in [ 5664, 5722, 35128, 36329, 184922, 4896, 4932, 6239, 7227, 7955, 9606, 10090, 44689, 10116, 9031, 81824, 7739, 7165, 6945, 665079, 6183, 5476, 5722, 5664, 5270, 5207, 5141, 4952, 45351, 451804, 36329, 35128, 184922, 145481, 13684 ]: if taxon_id1 < taxon_id2: print "threshold_of_taxon_pair[%d][%d] = 1.5" % (taxon_id1, taxon_id2) elif taxon_id2 < taxon_id1: print "threshold_of_taxon_pair[%d][%d] = 1.5" % (taxon_id2, taxon_id1) print "Thresholds between Fungi and non-fungi Eukaryotes" for taxon_id1 in [ 665079, 4952, 5141, 5207, 451804, 5270, 5476, 13684, 81824 ]: for taxon_id2 in [9606, 10090, 7955, 7227, 6239, 44689, 3702]: if taxon_id1 < taxon_id2: print "threshold_of_taxon_pair[%d][%d] = 1.5" % (taxon_id1, taxon_id2) else: print "threshold_of_taxon_pair[%d][%d] = 1.5" % (taxon_id2, taxon_id1)