Exemplo n.º 1
0
def get_stone_weights(tree):
    """
    This method was proposed by Stone and Sidow.
    @param tree: a tree object with branch lengths for all non-root nodes
    @return: a sequence of (name, weight) pairs
    """
    # augment each node with an identifier that will survive a deep copy
    for i, node in enumerate(tree.preorder()):
        node.id = i
    # average over all rootings of the tree
    tip_id_to_weight = {}
    for old_target in tree.gen_non_root_nodes():
        # create a new rerooted tree
        clone = copy.deepcopy(tree)
        new_target_list = [
            node for node in clone.preorder() if node.id == old_target.id
        ]
        assert len(new_target_list) == 1
        target = new_target_list[0]
        new_root = Newick.NewickNode()
        clone.insert_node(new_root, target.parent, target, .5)
        clone.reroot(new_root)
        # find the weights of the rerooted tree using a more traditional method
        # the 'current' attribute added to each tip is its weight
        get_thompson_weights(clone)
        # for each tip add the contribution of this weighting
        for tip in clone.gen_tips():
            weight = tip_id_to_weight.get(tip.id, 0)
            contribution = old_target.blen * tip.current
            tip_id_to_weight[tip.id] = weight + contribution
    # report the final weights
    grand_total_weight = sum(tip_id_to_weight.values())
    return [(tip.name, tip_id_to_weight[tip.id] / grand_total_weight)
            for tip in tree.gen_tips()]
Exemplo n.º 2
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the minimum number of segments
    min_segment_count = fs.segments
    # determine the maximum allowed branch length
    total_branch_length = tree.get_total_length()
    max_branch_length = total_branch_length / float(min_segment_count)
    # any branch longer than the max branch length will be broken in half
    while True:
        old_nodes = list(tree.preorder())
        for node in old_nodes:
            if node is tree.root:
                if node.blen is not None:
                    msg = 'the root node should not have a branch length'
                    raise HandlingError(msg)
            elif node.blen is None:
                msg = 'each non-root node should have a branch length'
                raise HandlingError(msg)
            elif node.blen > max_branch_length:
                # create a new node and set its attributes
                new = Newick.NewickNode()
                new.name = node.name
                # insert the new node
                tree.insert_node(new, node.parent, node, .5)
        # if no node was added then break out of the loop
        if len(old_nodes) == len(list(tree.preorder())):
            break
    # return the response
    return tree.get_newick_string() + '\n'
Exemplo n.º 3
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the fraction
    fraction = fs.fraction
    # convert the node names to node objects
    try:
        parent = tree.get_unique_node(fs.parent)
        child = tree.get_unique_node(fs.child)
    except Newick.NewickSearchError as e:
        raise HandlingError(e)
    # allow the parent and child nodes to be specified in the reverse order
    if (parent is not child.parent) and (child is parent.parent):
        parent, child = child, parent
        fraction = 1 - fraction
    # verify the relationship between the parent and child nodes
    if parent is not child.parent:
        msg = 'the given parent and child nodes are not adjacent'
        raise HandlingError(msg)
    # determine the new root node, creating a new one if necessary
    if fraction == 0:
        target = parent
    elif fraction == 1:
        target = child
    else:
        target = Newick.NewickNode()
        tree.insert_node(target, parent, child, fraction)
    if target is tree.root:
        raise HandlingError('the new root is the same as the old root')
    if not target.parent:
        raise HandlingError('topology error')
    # reroot
    old = tree.root
    tree.reroot(target)
    # if the old root has a single child then remove the old root
    if len(old.children) == 1:
        tree.remove_node(old)
    # return the response
    return tree.get_newick_string() + '\n'
Exemplo n.º 4
0
 def _build_tree(self, indices, depth):
     """
     @param indices: a set of indices of taxa in the current subtree
     @param depth: the depth of the current subtree
     @return: the node representing the subtree
     """
     root = Newick.NewickNode()
     if not indices:
         msg = 'trying to build a tree from an empty set of indices'
         raise ValueError(msg)
     elif len(indices) == 1:
         index = list(indices)[0]
         root.set_name(self.ordered_names[index])
     else:
         if depth >= len(self.sorted_eigensystem):
             # the ordered eigenvector loading signs
             # were unable to distinguish each taxon
             raise IncompleteError()
         negative_indices = set()
         positive_indices = set()
         negligible_indices = set()
         w, v = self.sorted_eigensystem[depth]
         for i in indices:
             if abs(v[i]) < self.epsilon:
                 negligible_indices.add(i)
             elif v[i] < 0:
                 negative_indices.add(i)
             else:
                 positive_indices.add(i)
         if negligible_indices:
             # eigenvector loadings near zero are degenerate
             raise NegligibleError()
         for next_indices in (negative_indices, positive_indices):
             if next_indices:
                 child = self._build_tree(next_indices, depth + 1)
                 child.set_branch_length(1)
                 root.add_child(child)
                 child.set_parent(root)
     return root
Exemplo n.º 5
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # modify the tree
    old_nodes = list(tree.preorder())
    for node in old_nodes:
        if node is tree.root:
            if node.blen is not None:
                msg = 'the root node should not have a branch length'
                raise HandlingError(msg)
        elif node.blen is None:
            msg = 'each non-root node should have a branch length'
            raise HandlingError(msg)
        else:
            # create a new node and set its attributes
            new = Newick.NewickNode()
            new.name = node.name
            # insert the new node
            tree.insert_node(new, node.parent, node, .5)
    # return the response
    return tree.get_newick_string() + '\n'
Exemplo n.º 6
0
def contrast_matrix_to_tree(C, ordered_names):
    """
    @param C: contrast matrix as a numpy array
    @param ordered_names: leaf names corresponding to rows of C
    @return: a newick tree object
    """
    contrasts = C.T.tolist()
    # partition the contrasts into the ones with and without entries that are zero
    c_with_zero = [c for c in contrasts if 0 in c]
    c_without_zero = [c for c in contrasts if 0 not in c]
    # exactly one contrast should not have any zero element
    assert len(c_without_zero) == 1
    root_contrast = c_without_zero[0]
    # the variance partition is not defined at the root
    neg_weight = None
    root_node = Newick.NewickNode()
    root_info = ReconstructionInfo(root_node)
    root_info.build_subtree(root_contrast, c_with_zero, neg_weight,
                            ordered_names)
    # get a newick tree from the newick root
    tree = Newick.NewickTree(root_node)
    return tree
Exemplo n.º 7
0
def get_response_content(fs):
    # read the values from the form
    subtree_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree)
    taxa_a1 = Util.get_stripped_lines(StringIO(fs.taxa_a1))
    taxa_a2 = Util.get_stripped_lines(StringIO(fs.taxa_a2))
    subtree_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree)
    taxa_b1 = Util.get_stripped_lines(StringIO(fs.taxa_b1))
    taxa_b2 = Util.get_stripped_lines(StringIO(fs.taxa_b2))
    connecting_branch_length = fs.blen
    # assert that no group of taxa contains duplicates
    for taxa in (taxa_a1, taxa_a2, taxa_b1, taxa_b2):
        if len(set(taxa)) != len(taxa):
            raise HandlingError('one of the lists of taxa contains duplicates')
    # assert that each subtree has at least two tips and no duplicates
    for tree in (subtree_a, subtree_b):
        tip_names = list(node.get_name() for node in tree.gen_tips())
        if len(tip_names) < 2:
            raise HandlingError('each subtree should have at least two tips')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('a subtree has duplicate tip names')
    # assert that the partitions are valid
    first_group = ('A', subtree_a, taxa_a1, taxa_a2) 
    second_group = ('B', subtree_b, taxa_b1, taxa_b2)
    for tree_name, tree, taxa_1, taxa_2 in (first_group, second_group):
        tip_names = set(node.get_name() for node in tree.gen_tips())
        for group_name, taxa in (('1', taxa_1), ('2', taxa_2)):
            nonsense_names = list(set(taxa) - set(tip_names))
            msg_a = 'the following taxa in group %s ' % group_name
            msg_b = 'of subtree %s ' % tree_name
            msg_c = 'are not valid tips: %s' % str(nonsense_names)
            message = msg_a + msg_b + msg_c
            if nonsense_names:
                raise HandlingError(message)
        if set(taxa_1) & set(taxa_2):
            msg_a = 'the taxon lists for subtree %s ' % tree_name
            msg_b = 'are not disjoint'
            raise HandlingError(msg_a + msg_b)
        if set(taxa_1) | set(taxa_2) < tip_names:
            msg_a = 'a tip in subtree %s ' % tree_name
            msg_b = 'is not represented in either of the groups'
            raise HandlingError(msg_a + msg_b)
    # define the response
    out = StringIO()
    # get the results for the first method
    do_first_method(subtree_a, subtree_b, taxa_a1, taxa_a2,
            taxa_b1, taxa_b2, connecting_branch_length, out)
    # define the entire tree by connecting the subtrees
    subtree_b.get_root().set_branch_length(connecting_branch_length)
    subtree_a.get_root().add_child(subtree_b.get_root())
    tree = subtree_a
    # define the order and structure of the distance matrix
    block_structure = get_block_structure(taxa_a1, taxa_a2, taxa_b1, taxa_b2)
    name_order = taxa_a1 + taxa_a2 + taxa_b1 + taxa_b2
    # get the distance matrix
    fel_tree = NewickIO.parse(NewickIO.get_newick_string(tree),
            FelTree.NewickTree)
    D = fel_tree.get_distance_matrix(name_order)
    # get the R matrix
    R = Clustering.get_R_balaji(D)
    # get the sums of block elements of R
    block_R = [[0]*4 for i in range(4)]
    for i, block_i in enumerate(block_structure):
        for j, block_j in enumerate(block_structure):
            block_R[block_i][block_j] += R[i][j]
    # show the results from the second method
    do_second_method(fel_tree, taxa_a1, taxa_a2, taxa_b1, taxa_b2, out)
    # show the results from the third method
    tree_m3_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree)
    tree_m3_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree)
    for t in (tree_m3_a, tree_m3_b):
        neo = Newick.NewickNode()
        neo.name = 'special'
        neo.blen = connecting_branch_length / 2
        t.get_root().add_child(neo)
    feltree_m3_a = NewickIO.parse(NewickIO.get_newick_string(tree_m3_a),
            FelTree.NewickTree)
    feltree_m3_b = NewickIO.parse(NewickIO.get_newick_string(tree_m3_b),
            FelTree.NewickTree)
    tree_m3_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree)
    tree_m3_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree)
    new_root = Newick.NewickNode()
    tree_m3_a.get_root().blen = connecting_branch_length / 2
    tree_m3_b.get_root().blen = connecting_branch_length / 2
    new_root.add_child(tree_m3_a.get_root())
    new_root.add_child(tree_m3_b.get_root())
    tree_m3 = Newick.NewickTree(new_root)
    feltree_m3 = NewickIO.parse(NewickIO.get_newick_string(tree_m3),
            FelTree.NewickTree)
    branch_d2 = connecting_branch_length / 2
    do_third_method(feltree_m3_a, feltree_m3_b, feltree_m3,
            branch_d2, taxa_a1, taxa_a2, taxa_b1, taxa_b2, out)
    # show the expected results
    print >> out, 'M:'
    print >> out, MatrixUtil.m_to_string(R)
    print >> out, 'M summed within blocks:'
    print >> out, MatrixUtil.m_to_string(block_R)
    # return the response
    return out.getvalue()
Exemplo n.º 8
0
def make_tree(D, ordered_states, iteration_callback=None):
    """
    Create a newick tree from a distance matrix using neighbor joining.
    @param D: a row major distance matrix
    @param ordered_states: state names ordered according to the distance matrix
    @param iteration_callback: called with the output of each iteration call
    @return: a newick tree
    """
    # make sure that there are enough states
    if len(ordered_states) < 3:
        raise ValueError(
            'the neighbor joining algorithm needs at least three nodes')
    # create a dictionary mapping the subtree root node serial number to a subtree
    forest = {}
    # set the current state
    index_to_serial = range(len(ordered_states))
    next_serial = len(ordered_states)
    # repeatedly pair off neighbors
    while True:
        # get the new vector of distances and the neighbor index pair
        result = do_iteration(D)
        if iteration_callback:
            # get the Q matrix for show
            Q = get_Q_matrix(D)
            # report the Q matrix and the result of the iteration
            iteration_callback(Q, result)
        v, (f, g) = result
        # create the subtree from the index pair
        root = Newick.NewickNode()
        root.serial_number = next_serial
        # determine the indices to use as branches
        if len(index_to_serial) == 3:
            branch_indices = range(3)
        else:
            branch_indices = (f, g)
        # add branches to the tree
        for index in branch_indices:
            neo = forest.pop(index_to_serial[index], None)
            if not neo:
                neo = Newick.NewickNode()
                neo.serial_number = index_to_serial[index]
            root.add_child(neo)
            neo.set_parent(root)
            neo.blen = v[index]
        # handle the terminal case
        if len(index_to_serial) == 3:
            # create the newick tree from the root node
            tree = Newick.NewickTree(root)
            # add names to the tips of the tree
            for node in tree.gen_tips():
                node.name = ordered_states[node.serial_number]
            # convert the tree to a FelTree and return it
            return NewickIO.parse(tree.get_newick_string(), FelTree.NewickTree)
        else:
            # add the subtree to the forest
            forest[next_serial] = root
            # make the next distance matrix
            next_D = []
            for i, row in enumerate(D):
                if i not in (f, g):
                    next_row = [
                        value for j, value in enumerate(row) if j not in (f, g)
                    ]
                    next_row.append(v[i])
                    next_D.append(next_row)
            next_row = [value for j, value in enumerate(v) if j not in (f, g)]
            next_row.append(0)
            next_D.append(next_row)
            D = next_D
            # make the next serial number map
            next_index_to_serial = [
                value for j, value in enumerate(index_to_serial)
                if j not in (f, g)
            ]
            next_index_to_serial.append(next_serial)
            index_to_serial = next_index_to_serial
            # increment the serial number
            next_serial += 1
Exemplo n.º 9
0
 def build_subtree(self, current_contrast, subtree_contrasts, neg_weight,
                   ordered_names):
     """
     This is a recursive function that builds the tree.
     @param current_contrast: the contrast of the current node
     @param subtree_contrasts: a set of contrasts defining the subtree
     @param neg_weight: a way of partitioning the variance or None if root
     @param ordered_names: a list of names conformant with the contrast vector
     """
     # build the negative and positive subtrees
     for i, mycmp in enumerate((is_negative, is_positive)):
         indices = frozenset(i for i, x in enumerate(current_contrast)
                             if mycmp(x))
         child_contrasts = []
         child_subtree_contrasts = []
         for contrast in subtree_contrasts:
             nonzero_indices = frozenset(i for i, x in enumerate(contrast)
                                         if x)
             if nonzero_indices == indices:
                 child_contrasts.append(contrast)
             elif nonzero_indices < indices:
                 child_subtree_contrasts.append(contrast)
         if len(child_contrasts) > 1:
             raise ContrastError()
         elif len(child_contrasts) == 0 and len(indices) != 1:
             raise ContrastError()
         # create the child node
         child_node = Newick.NewickNode()
         child_node.parent = self.node
         self.node.children.append(child_node)
         child_info = ReconstructionInfo(child_node)
         if child_contrasts:
             # the child node is an internal node
             child_contrast = child_contrasts[0]
             child_neg_weight = _get_neg_weight(current_contrast,
                                                child_contrast)
             child_info.build_subtree(child_contrast,
                                      child_subtree_contrasts,
                                      child_neg_weight, ordered_names)
         else:
             # the child node is a leaf node
             index, = indices
             child_node.name = ordered_names[index]
             child_info.variance = 0
         self.child_info_pair.append(child_info)
     # define the child branch lengths using the variance partition
     a0 = self.child_info_pair[0].variance
     b0 = self.child_info_pair[1].variance
     total_variance = _get_contrast_variance(current_contrast)
     if neg_weight is None:
         a = (total_variance - a0 - b0) / 2
         b = (total_variance - a0 - b0) / 2
     else:
         a, b = _get_branch_lengths(total_variance, (1 - neg_weight), a0,
                                    b0)
     alpha = a + a0
     beta = b + b0
     # set the branch lengths of the children
     self.node.children[0].blen = a
     self.node.children[1].blen = b
     # set the variance of the current node
     self.variance = (alpha * beta) / (alpha + beta)
Exemplo n.º 10
0
 def _make_tree_helper(self, D, index_to_serial, depth=0):
     """
     Recursively build a newick tree from a distance matrix.
     @param D: a row major distance matrix
     @param index_to_serial: converts an index in D to a serial number for the tree node
     @param depth: gives the recursion depth; this is for instrumentation
     @return: a newick tree with branch lengths
     """
     # instrumentation to notify the framework that a recursive call has been made
     if self.callback:
         self.callback(depth)
     # recursively build the newick tree
     n = len(D)
     if n == 3:
         # if there are only three nodes then return a single star tree
         v, (f, g) = NeighborJoining.do_iteration(D)
         root = Newick.NewickNode()
         for i, d in enumerate(v):
             neo = Newick.NewickNode()
             neo.serial_number = index_to_serial[i]
             neo.blen = d
             root.add_child(neo)
             neo.set_parent(root)
         return Newick.NewickTree(root)
     # try to get the selection using a custom splitter
     selection = self.splitter.get_selection(D)
     complement = set(range(n)) - selection
     # if the split was insufficient then resort to either modifying the distance matrix or using neighbor joining
     fallback = False
     if min(len(selection), len(complement)) < 2:
         fallback = True
         if self.fallback_name == 'nj':
             # use an iteration of neighbor joining if this is the preferred fallback method
             v, (f, g) = NeighborJoining.do_iteration(D)
             selection = set((f, g))
             complement = set(range(n)) - selection
         elif self.fallback_name == 'halving':
             # repeatedly modify the distance matrix if this is the preferred fallback method
             halving_count = 0
             while min(len(selection), len(complement)) < 2:
                 # kill the loop if the halving count is ridiculous
                 if halving_count > 1000:
                     error_out = StringIO()
                     print >> error_out, 'the number of leaf stem halving iterations is ridiculous (%d);' % halving_count
                     print >> error_out, 'the singleton leaf stem length is %s;' % leaf_stem_length
                     print >> error_out, 'the distance matrix is:'
                     print >> error_out, MatrixUtil.m_to_string(D)
                     raise NeighborhoodJoiningError(
                         error_out.getvalue().strip())
                 # find the index of the leaf singleton
                 halving_count += 1
                 if len(selection) == 1:
                     smaller = selection
                     larger = complement
                 elif len(complement) == 1:
                     smaller = complement
                     larger = selection
                 else:
                     error_out = StringIO()
                     print >> error_out, 'in the following distance matrix,'
                     print >> error_out, 'a split was so degenerate that it did not even leave a leaf stem to work with:'
                     print >> error_out, MatrixUtil.m_to_string(D)
                     raise NeighborhoodJoiningError(
                         error_out.getvalue().strip())
                 v = get_crossing_distances(D, selection, complement)
                 # get the distance from the leaf singleton to the root of the rest of the tree
                 leaf_singleton_index = list(smaller)[0]
                 leaf_stem_length = v[leaf_singleton_index]
                 # if the leaf stem length is zero then repeatedly halving it will not help.
                 if not leaf_stem_length:
                     error_out = StringIO()
                     print >> error_out, 'the singleton leaf stem length is zero;'
                     print >> error_out, 'the number of leaf stem halving iterations performed was %d;' % halving_count
                     print >> error_out, 'the distance matrix is:'
                     print >> error_out, MatrixUtil.m_to_string(D)
                     raise NeighborhoodJoiningError(
                         error_out.getvalue().strip())
                 # modify the distance matrix
                 for i in larger:
                     D[i][leaf_singleton_index] -= leaf_stem_length / 2
                     D[leaf_singleton_index][i] -= leaf_stem_length / 2
                 # get the selection and complement using the modified distance matrix
                 selection = self.splitter.get_selection(D)
                 complement = set(range(n)) - selection
     # define the new serial numbers for the selection and complement subtrees
     selection_serial = self.number_generator.get_next()
     complement_serial = self.number_generator.get_next()
     # for reporting purposes only,
     # store the subset of leaf serials defined by each new serial number
     for new_serial, indices in ((selection_serial, selection),
                                 (complement_serial, complement)):
         serials = set(index_to_serial[i] for i in indices)
         new_set = set()
         for serial in serials:
             new_set.update(self.serial_number_to_tip_set[serial])
         self.serial_number_to_tip_set[new_serial] = new_set
     # report the split
     flattened_selection = set(
         self.ordered_labels[serial]
         for serial in self.serial_number_to_tip_set[selection_serial])
     if fallback:
         if self.fallback_name == 'nj':
             self.on_nj_fallback_split(flattened_selection, len(selection),
                                       len(complement))
         elif self.fallback_name == 'halving':
             self.on_halving_fallback_split(flattened_selection,
                                            len(selection), len(complement),
                                            halving_count)
         else:
             assert False, 'internal error: invalid fallback method'
     else:
         self.on_custom_split(flattened_selection, len(selection),
                              len(complement))
     # break the distance matrix into two distance matrices,
     # then make a tree for each one.
     A = list(sorted(selection))
     B = list(sorted(complement))
     A_distance_matrix, B_distance_matrix = split_distance_matrix(
         D, selection, complement)
     # define the serial numbers for the split distance matrices
     A_index_to_serial = [index_to_serial[i]
                          for i in A] + [complement_serial]
     B_index_to_serial = [index_to_serial[i]
                          for i in B] + [selection_serial]
     # make the next trees
     A_tree = self._make_tree_helper(A_distance_matrix, A_index_to_serial,
                                     depth + 1)
     B_tree = self._make_tree_helper(B_distance_matrix, B_index_to_serial,
                                     depth + 1)
     # return the merged tree
     return merge_trees(A_tree, B_tree)