Exemplo n.º 1
0
 def __call__(self, tree):
     # get the partitions implied by the tree
     valid_partitions = TreeComparison.get_partitions(tree)
     # Get the partition implied by the Fiedler split
     # of the graph derived from the tree.
     tip_nodes = list(tree.gen_tips())
     D = tree.get_partial_distance_matrix(
             [id(node) for node in tip_nodes])
     y = get_vector(D).tolist()
     name_selection = frozenset(node.get_name()
             for node, elem in zip(tip_nodes, y) if elem > 0)
     name_complement = frozenset(node.get_name()
             for node, elem in zip(tip_nodes, y) if elem <= 0)
     name_partition = frozenset((name_selection, name_complement))
     if name_partition not in valid_partitions:
         msg = '\n'.join([
             'invalid partition found:',
             'tree:', NewickIO.get_newick_string(tree),
             'invalid partition:', name_partition])
         if not self.fout:
             self.fout = open(self.counterexample_filename, 'wt')
         print >> self.fout, msg
         print msg
         self.ncounterexamples += 1
     # do not stop looking, even if a counterexample is found
     return False
Exemplo n.º 2
0
 def __call__(self, tree):
     # get the partitions implied by the tree
     valid_partitions = TreeComparison.get_partitions(tree)
     # Get the partition implied by the Fiedler split
     # of the graph derived from the tree.
     tip_nodes = list(tree.gen_tips())
     D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes])
     y = get_vector(D).tolist()
     name_selection = frozenset(node.get_name()
                                for node, elem in zip(tip_nodes, y)
                                if elem > 0)
     name_complement = frozenset(node.get_name()
                                 for node, elem in zip(tip_nodes, y)
                                 if elem <= 0)
     name_partition = frozenset((name_selection, name_complement))
     if name_partition not in valid_partitions:
         msg = '\n'.join([
             'invalid partition found:', 'tree:',
             NewickIO.get_newick_string(tree), 'invalid partition:',
             name_partition
         ])
         if not self.fout:
             self.fout = open(self.counterexample_filename, 'wt')
         print >> self.fout, msg
         print msg
         self.ncounterexamples += 1
     # do not stop looking, even if a counterexample is found
     return False
Exemplo n.º 3
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' +
                                str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()
Exemplo n.º 4
0
def main():
    filename = 'counterexamples.out'
    fout = open(filename, 'wt')
    print 'Does monotonically transforming the pairwise leaf distances affect the compatibility'
    print 'of the split found using principal coordinate analysis?'
    print 'I am looking through random trees for a tree that is split incompatibly'
    print 'when distances are squared.'
    print 'Use control-c to stop the program when you get bored.'
    try:
        count = 0
        ncounterexamples = 0
        nerrors = 0
        while True:
            count += 1
            # get a random tree
            n_base_leaves = 4
            n_expected_extra_leaves = 1
            expected_branch_length = 1
            tree = TreeSampler.sample_tree(n_base_leaves,
                                           n_expected_extra_leaves,
                                           expected_branch_length)
            # get the set of valid partitions implied by the tree
            valid_parts = TreeComparison.get_partitions(tree)
            ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
            # assert that the partition implied by the correct formula is valid
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            loadings = get_principal_coordinate(D)
            nonneg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                nerrors += 1
                print >> fout, 'error: a partition that was supposed to be valid was found to be invalid'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
            # check the validity of the partition implied by the incorrect formula
            Q = D * D
            loadings = get_principal_coordinate(Q)
            nonneg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                ncounterexamples += 1
                print >> fout, 'found a counterexample!'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
    except KeyboardInterrupt, e:
        print 'trees examined:', count
        print 'errors:', nerrors
        print 'counterexamples:', ncounterexamples
Exemplo n.º 5
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()
Exemplo n.º 6
0
def main():
    filename = 'counterexamples.out'
    fout = open(filename, 'wt')
    print 'Does monotonically transforming the pairwise leaf distances affect the compatibility'
    print 'of the split found using principal coordinate analysis?'
    print 'I am looking through random trees for a tree that is split incompatibly'
    print 'when distances are squared.'
    print 'Use control-c to stop the program when you get bored.'
    try:
        count = 0
        ncounterexamples = 0
        nerrors = 0
        while True:
            count += 1
            # get a random tree
            n_base_leaves = 4
            n_expected_extra_leaves = 1
            expected_branch_length = 1
            tree = TreeSampler.sample_tree(n_base_leaves, n_expected_extra_leaves, expected_branch_length)
            # get the set of valid partitions implied by the tree
            valid_parts = TreeComparison.get_partitions(tree)
            ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
            # assert that the partition implied by the correct formula is valid
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            loadings = get_principal_coordinate(D)
            nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                nerrors += 1
                print >> fout, 'error: a partition that was supposed to be valid was found to be invalid'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
            # check the validity of the partition implied by the incorrect formula
            Q = D * D
            loadings = get_principal_coordinate(Q)
            nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                ncounterexamples += 1
                print >> fout, 'found a counterexample!'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
    except KeyboardInterrupt, e:
        print 'trees examined:', count
        print 'errors:', nerrors
        print 'counterexamples:', ncounterexamples
Exemplo n.º 7
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(fs.trees.splitlines()):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips '
                                'but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # create the response
    out = StringIO()
    same_count = 0
    diff_count = 0
    for tree in trees:
        # make the local paragraph that will be shown if there is an event
        local_out = StringIO()
        has_event = False
        # print the tree
        print >> local_out, NewickIO.get_newick_string(tree)
        # get the tip nodes and the internal nodes
        tip_nodes = []
        internal_nodes = []
        for node in tree.preorder():
            if node.is_tip():
                tip_nodes.append(node)
            else:
                internal_nodes.append(node)
        all_nodes = tip_nodes + internal_nodes
        # get all tip name partitions implied by the tree topology
        valid_partitions = TreeComparison.get_partitions(tree)
        # get results from the augmented distance matrix
        D_full = tree.get_partial_distance_matrix(
            [id(node) for node in all_nodes])
        y_full = get_vector(D_full).tolist()
        y = y_full[:len(tip_nodes)]
        name_selection = frozenset(node.get_name()
                                   for node, elem in zip(tip_nodes, y)
                                   if elem > 0)
        name_complement = frozenset(node.get_name()
                                    for node, elem in zip(tip_nodes, y)
                                    if elem <= 0)
        name_partition_a = frozenset((name_selection, name_complement))
        if name_partition_a not in valid_partitions:
            print >> local_out, 'augmented distance matrix split fail:',
            print >> local_out, name_partition_a
            has_event = True
        # get results from the not-augmented distance matrix
        D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes])
        y = get_vector(D).tolist()
        name_selection = frozenset(node.get_name()
                                   for node, elem in zip(tip_nodes, y)
                                   if elem > 0)
        name_complement = frozenset(node.get_name()
                                    for node, elem in zip(tip_nodes, y)
                                    if elem <= 0)
        name_partition_b = frozenset((name_selection, name_complement))
        if name_partition_b not in valid_partitions:
            print >> local_out, 'not-augmented distance matrix split fail:',
            print >> local_out, name_partition_b
            has_event = True
        # compare the name partitions
        if name_partition_a == name_partition_b:
            same_count += 1
        else:
            diff_count += 1
            print >> local_out, 'this tree was split differently '
            print >> local_out, 'by the different methods:'
            print >> local_out, 'augmented distance matrix split:',
            print >> local_out, name_partition_a
            print >> local_out, 'not-augmented distance matrix split:',
            print >> local_out, name_partition_b
            has_event = True
        # print a newline between trees
        if has_event:
            print >> out, local_out.getvalue()
    # write the summary
    print >> out, 'for this many trees the same split was found:',
    print >> out, same_count
    print >> out, 'for this many trees different splits were found:',
    print >> out, diff_count
    # write the response
    return out.getvalue()
Exemplo n.º 8
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(fs.trees.splitlines()):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError(
                    'expected at least four tips '
                    'but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # create the response
    out = StringIO()
    same_count = 0
    diff_count = 0
    for tree in trees:
        # make the local paragraph that will be shown if there is an event
        local_out = StringIO()
        has_event = False
        # print the tree
        print >> local_out, NewickIO.get_newick_string(tree)
        # get the tip nodes and the internal nodes
        tip_nodes = []
        internal_nodes = []
        for node in tree.preorder():
            if node.is_tip():
                tip_nodes.append(node)
            else:
                internal_nodes.append(node)
        all_nodes = tip_nodes + internal_nodes
        # get all tip name partitions implied by the tree topology
        valid_partitions = TreeComparison.get_partitions(tree)
        # get results from the augmented distance matrix
        D_full = tree.get_partial_distance_matrix(
                [id(node) for node in all_nodes])
        y_full = get_vector(D_full).tolist()
        y = y_full[:len(tip_nodes)]
        name_selection = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem > 0)
        name_complement = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem <= 0)
        name_partition_a = frozenset((name_selection, name_complement))
        if name_partition_a not in valid_partitions:
            print >> local_out, 'augmented distance matrix split fail:',
            print >> local_out, name_partition_a
            has_event = True
        # get results from the not-augmented distance matrix
        D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes])
        y = get_vector(D).tolist()
        name_selection = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem > 0)
        name_complement = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem <= 0)
        name_partition_b = frozenset((name_selection, name_complement))
        if name_partition_b not in valid_partitions:
            print >> local_out, 'not-augmented distance matrix split fail:',
            print >> local_out, name_partition_b
            has_event = True
        # compare the name partitions
        if name_partition_a == name_partition_b:
            same_count += 1
        else:
            diff_count += 1
            print >> local_out, 'this tree was split differently '
            print >> local_out, 'by the different methods:'
            print >> local_out, 'augmented distance matrix split:',
            print >> local_out, name_partition_a
            print >> local_out, 'not-augmented distance matrix split:',
            print >> local_out, name_partition_b
            has_event = True
        # print a newline between trees
        if has_event:
            print >> out, local_out.getvalue()
    # write the summary
    print >> out, 'for this many trees the same split was found:',
    print >> out, same_count
    print >> out, 'for this many trees different splits were found:',
    print >> out, diff_count
    # write the response
    return out.getvalue()