def __call__(self, tree): # get the partitions implied by the tree valid_partitions = TreeComparison.get_partitions(tree) # Get the partition implied by the Fiedler split # of the graph derived from the tree. tip_nodes = list(tree.gen_tips()) D = tree.get_partial_distance_matrix( [id(node) for node in tip_nodes]) y = get_vector(D).tolist() name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition = frozenset((name_selection, name_complement)) if name_partition not in valid_partitions: msg = '\n'.join([ 'invalid partition found:', 'tree:', NewickIO.get_newick_string(tree), 'invalid partition:', name_partition]) if not self.fout: self.fout = open(self.counterexample_filename, 'wt') print >> self.fout, msg print msg self.ncounterexamples += 1 # do not stop looking, even if a counterexample is found return False
def __call__(self, tree): # get the partitions implied by the tree valid_partitions = TreeComparison.get_partitions(tree) # Get the partition implied by the Fiedler split # of the graph derived from the tree. tip_nodes = list(tree.gen_tips()) D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes]) y = get_vector(D).tolist() name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition = frozenset((name_selection, name_complement)) if name_partition not in valid_partitions: msg = '\n'.join([ 'invalid partition found:', 'tree:', NewickIO.get_newick_string(tree), 'invalid partition:', name_partition ]) if not self.fout: self.fout = open(self.counterexample_filename, 'wt') print >> self.fout, msg print msg self.ncounterexamples += 1 # do not stop looking, even if a counterexample is found return False
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(StringIO(fs.trees)): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError('expected at least four tips but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # begin the response out = StringIO() # look at each tree nerrors = 0 ncounterexamples = 0 for tree in trees: # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> out, 'error: a partition that was supposed to be valid was found to be invalid' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> out, 'found a counterexample!' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out print >> out, 'errors found:', nerrors print >> out, 'counterexamples found:', ncounterexamples # return the response return out.getvalue()
def main(): filename = 'counterexamples.out' fout = open(filename, 'wt') print 'Does monotonically transforming the pairwise leaf distances affect the compatibility' print 'of the split found using principal coordinate analysis?' print 'I am looking through random trees for a tree that is split incompatibly' print 'when distances are squared.' print 'Use control-c to stop the program when you get bored.' try: count = 0 ncounterexamples = 0 nerrors = 0 while True: count += 1 # get a random tree n_base_leaves = 4 n_expected_extra_leaves = 1 expected_branch_length = 1 tree = TreeSampler.sample_tree(n_base_leaves, n_expected_extra_leaves, expected_branch_length) # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> fout, 'error: a partition that was supposed to be valid was found to be invalid' print >> fout, 'tree:', NewickIO.get_newick_string(tree) print >> fout, 'invalid partition:', partition_to_string(part) print >> fout # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> fout, 'found a counterexample!' print >> fout, 'tree:', NewickIO.get_newick_string(tree) print >> fout, 'invalid partition:', partition_to_string(part) print >> fout except KeyboardInterrupt, e: print 'trees examined:', count print 'errors:', nerrors print 'counterexamples:', ncounterexamples
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(StringIO(fs.trees)): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError('expected at least four tips but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # begin the response out = StringIO() # look at each tree nerrors = 0 ncounterexamples = 0 for tree in trees: # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> out, 'error: a partition that was supposed to be valid was found to be invalid' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> out, 'found a counterexample!' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out print >> out, 'errors found:', nerrors print >> out, 'counterexamples found:', ncounterexamples # return the response return out.getvalue()
def main(): filename = 'counterexamples.out' fout = open(filename, 'wt') print 'Does monotonically transforming the pairwise leaf distances affect the compatibility' print 'of the split found using principal coordinate analysis?' print 'I am looking through random trees for a tree that is split incompatibly' print 'when distances are squared.' print 'Use control-c to stop the program when you get bored.' try: count = 0 ncounterexamples = 0 nerrors = 0 while True: count += 1 # get a random tree n_base_leaves = 4 n_expected_extra_leaves = 1 expected_branch_length = 1 tree = TreeSampler.sample_tree(n_base_leaves, n_expected_extra_leaves, expected_branch_length) # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> fout, 'error: a partition that was supposed to be valid was found to be invalid' print >> fout, 'tree:', NewickIO.get_newick_string(tree) print >> fout, 'invalid partition:', partition_to_string(part) print >> fout # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> fout, 'found a counterexample!' print >> fout, 'tree:', NewickIO.get_newick_string(tree) print >> fout, 'invalid partition:', partition_to_string(part) print >> fout except KeyboardInterrupt, e: print 'trees examined:', count print 'errors:', nerrors print 'counterexamples:', ncounterexamples
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(fs.trees.splitlines()): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError('expected at least four tips ' 'but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # create the response out = StringIO() same_count = 0 diff_count = 0 for tree in trees: # make the local paragraph that will be shown if there is an event local_out = StringIO() has_event = False # print the tree print >> local_out, NewickIO.get_newick_string(tree) # get the tip nodes and the internal nodes tip_nodes = [] internal_nodes = [] for node in tree.preorder(): if node.is_tip(): tip_nodes.append(node) else: internal_nodes.append(node) all_nodes = tip_nodes + internal_nodes # get all tip name partitions implied by the tree topology valid_partitions = TreeComparison.get_partitions(tree) # get results from the augmented distance matrix D_full = tree.get_partial_distance_matrix( [id(node) for node in all_nodes]) y_full = get_vector(D_full).tolist() y = y_full[:len(tip_nodes)] name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition_a = frozenset((name_selection, name_complement)) if name_partition_a not in valid_partitions: print >> local_out, 'augmented distance matrix split fail:', print >> local_out, name_partition_a has_event = True # get results from the not-augmented distance matrix D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes]) y = get_vector(D).tolist() name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition_b = frozenset((name_selection, name_complement)) if name_partition_b not in valid_partitions: print >> local_out, 'not-augmented distance matrix split fail:', print >> local_out, name_partition_b has_event = True # compare the name partitions if name_partition_a == name_partition_b: same_count += 1 else: diff_count += 1 print >> local_out, 'this tree was split differently ' print >> local_out, 'by the different methods:' print >> local_out, 'augmented distance matrix split:', print >> local_out, name_partition_a print >> local_out, 'not-augmented distance matrix split:', print >> local_out, name_partition_b has_event = True # print a newline between trees if has_event: print >> out, local_out.getvalue() # write the summary print >> out, 'for this many trees the same split was found:', print >> out, same_count print >> out, 'for this many trees different splits were found:', print >> out, diff_count # write the response return out.getvalue()
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(fs.trees.splitlines()): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError( 'expected at least four tips ' 'but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # create the response out = StringIO() same_count = 0 diff_count = 0 for tree in trees: # make the local paragraph that will be shown if there is an event local_out = StringIO() has_event = False # print the tree print >> local_out, NewickIO.get_newick_string(tree) # get the tip nodes and the internal nodes tip_nodes = [] internal_nodes = [] for node in tree.preorder(): if node.is_tip(): tip_nodes.append(node) else: internal_nodes.append(node) all_nodes = tip_nodes + internal_nodes # get all tip name partitions implied by the tree topology valid_partitions = TreeComparison.get_partitions(tree) # get results from the augmented distance matrix D_full = tree.get_partial_distance_matrix( [id(node) for node in all_nodes]) y_full = get_vector(D_full).tolist() y = y_full[:len(tip_nodes)] name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition_a = frozenset((name_selection, name_complement)) if name_partition_a not in valid_partitions: print >> local_out, 'augmented distance matrix split fail:', print >> local_out, name_partition_a has_event = True # get results from the not-augmented distance matrix D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes]) y = get_vector(D).tolist() name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition_b = frozenset((name_selection, name_complement)) if name_partition_b not in valid_partitions: print >> local_out, 'not-augmented distance matrix split fail:', print >> local_out, name_partition_b has_event = True # compare the name partitions if name_partition_a == name_partition_b: same_count += 1 else: diff_count += 1 print >> local_out, 'this tree was split differently ' print >> local_out, 'by the different methods:' print >> local_out, 'augmented distance matrix split:', print >> local_out, name_partition_a print >> local_out, 'not-augmented distance matrix split:', print >> local_out, name_partition_b has_event = True # print a newline between trees if has_event: print >> out, local_out.getvalue() # write the summary print >> out, 'for this many trees the same split was found:', print >> out, same_count print >> out, 'for this many trees different splits were found:', print >> out, diff_count # write the response return out.getvalue()