def write_tree( child_lists, name_map, rank_map, options, branch_length=1 ):
    # Uses Biopython, only load if making tree
    import Bio.Phylo
    from Bio.Phylo import BaseTree

    def _get_name( node_id ):
        if options.name_id:
            return node_id
        return name_map[node_id]
    nodes = {}
    root_node_id = child_lists["0"][0]
    nodes[root_node_id] = BaseTree.Clade( name=_get_name( root_node_id), branch_length=branch_length )

    def recurse_children( parent_id ):
        if options.cluster is not None and rank_map[parent_id] == options.cluster:
            # Short circuit if we found our rank, prevents 'hanging' no ranks from being output
            # e.g. clustering by "species" (Escherichia coli), but have "no rank" below (Escherichia coli K-12) in test_db
        if parent_id not in nodes:
            nodes[parent_id] = BaseTree.Clade( name=_get_name( parent_id ), branch_length=branch_length )
        for child_id in child_lists.get( parent_id, [] ):
            if options.cluster is None or ( rank_map[child_id] <= options.cluster  ):
                if child_id not in nodes:
                    nodes[child_id] = BaseTree.Clade(name=_get_name( child_id ), branch_length=branch_length)
                recurse_children( child_id )
    recurse_children( root_node_id )
    tree = BaseTree.Tree(root=nodes[root_node_id])
    Bio.Phylo.write( [tree], options.output_tree, 'newick' )
Пример #2
def prettyprint_tree(tree, file=None):
    # Convert the "tree" object (list of clades) to a BioPython tree
    # to take advantage of their output methods
    def create_ntree(tree):
        ntree = BaseTree.Clade()
        for key in tree:
            el = tree[key]
            if len(el.values()) > 0:
        return ntree

    # Sort the clades from largest to smallest
    new_tree = sorted(tree, key=lambda x: -len(x))
    # Build a dictionary representation of the tree
    tree_dict = {}
    for clade in new_tree:
        tree_dict = create_tree_dict(tree_dict, clade)
    # Convert the dictionary representation to a BioPython Tree object
    ntree = BaseTree.Tree(create_ntree(tree_dict))
    # Use the BioPython print method
    Phylo.draw_ascii(ntree, file=file)
Пример #3
def create_upgma_tree(matrix, is_distance=True):
    adj_map = matrix.create_adjacency_map()
    closest_pairs = create_closest_pairs(adj_map, is_distance=is_distance)
    clade_map = create_clade_map(adj_map)

    for i in range(matrix.size - 2):
        if is_distance:
            source, pair_edge = min(closest_pairs.items(), key=lambda x: x[1])
            source, pair_edge = max(closest_pairs.items(), key=lambda x: x[1])
                           closest_pairs, (source, pair_edge[0]),

    unmerged_clusters = list(clade_map.keys())
    unmerged_clades = list(clade_map.values())
    if len(unmerged_clusters) > 1:
        branch_length = adj_map[unmerged_clusters[0]][unmerged_clusters[1]]

        root = BaseTree.Clade(branch_length=branch_length,
        root = unmerged_clades[0]

    root.matrix = matrix
    tree = BaseTree.Tree(root=root, rooted=False)

    return tree
Пример #4
    def upgma(self, distance_matrix):
        """Construct and return an UPGMA tree.

        Constructs and returns an Unweighted Pair Group Method
        with Arithmetic mean (UPGMA) tree.

            distance_matrix : DistanceMatrix
                The distance matrix for tree construction.

        if not isinstance(distance_matrix, DistanceMatrix):
            raise TypeError("Must provide a DistanceMatrix object.")

        # make a copy of the distance matrix to be used
        dm = copy.deepcopy(distance_matrix)
        # init terminal clades
        clades = [BaseTree.Clade(None, name) for name in dm.names]
        # init minimum index
        min_i = 0
        min_j = 0
        inner_count = 0
        while len(dm) > 1:
            min_dist = dm[1, 0]
            # find minimum index
            for i in range(1, len(dm)):
                for j in range(0, i):
                    if min_dist >= dm[i, j]:
                        min_dist = dm[i, j]
                        min_i = i
                        min_j = j

            # create clade
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            inner_count += 1
            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
            # assign branch length
            clade1.branch_length = min_dist * 1.0 / 2 - self._height_of(clade1)

            clade2.branch_length = min_dist * 1.0 / 2 - self._height_of(clade2)

            # update node list
            clades[min_j] = inner_clade
            del clades[min_i]

            # rebuild distance matrix,
            # set the distances of new node at the index of min_j
            for k in range(0, len(dm)):
                if k != min_i and k != min_j:
                    dm[min_j, k] = (dm[min_i, k] + dm[min_j, k]) * 1.0 / 2

            dm.names[min_j] = "Inner" + str(inner_count)

            del dm[min_i]
        inner_clade.branch_length = 0
        return BaseTree.Tree(inner_clade)
Пример #5
    def fit(self):

        if self.dist_matrix is None:
            return (False)

        assert (self.dist_matrix.shape[0] == self.dist_matrix.shape[1])
        assert (not any(
            [self.dist_matrix[i][i] for i in self.dist_matrix.index]))

        self.tree = None

        self._nodes = {
            n: BaseTree.Clade(None, str(n))
            for n in self.dist_matrix.columns
        self._d_matrix = self.dist_matrix

        while self._d_matrix.shape[0] > 2:


            raw_min, col_min = self._get_pos_min_from_q_matrix()
            range_raw, range_col = self._get_dist_for_neighborhood(
                raw_min, col_min)
            new_name = "{}{}".format(str(raw_min), str(col_min))

            self._update_nodes(raw_min, col_min, range_raw, range_col,

            new_dist_matrix = self._get_new_dist_matrix(raw_min, col_min)
            dists_node = [
                self._get_dist_for_nodes(raw_min, col_min, index)
                for index in new_dist_matrix.index
            new_item = pd.Series(dists_node,
            new_dist_matrix = new_dist_matrix.append(new_item).T
            new_item = new_item.append(
                pd.Series(0, name=new_name, index=[new_name]))
            new_dist_matrix = new_dist_matrix.append(new_item)
            self._d_matrix = new_dist_matrix

        assert (len(self._nodes) == 2)
        name1 = self._d_matrix.index[0]
        name2 = self._d_matrix.index[1]
        node1 = self._nodes.pop(name1)
        node2 = self._nodes.pop(name2)
        node1.branch_length = self._d_matrix[name1][name2]
        self.tree = BaseTree.Tree(node2, rooted=False)

        self._nodes = None
        self._q_matrix = None
        self._d_matrix = None

        return (True)
Пример #6
    def create_tree(self, root: ParsimonyClade):
        Create tree with given root.

        root: root

        returns: tree
        return BaseTree.Tree(root, rooted=True)
Пример #7
def adam_consensus(trees):
    """Search Adam Consensus tree from multiple trees

        trees : list
            list of trees to produce consensus tree.
    clades = [tree.root for tree in trees]
    return BaseTree.Tree(root=_part(clades), rooted=True)
Пример #8
def strict_consensus(trees, mcmc=False):
    """Search strict consensus tree from multiple trees.

        trees : list
            list of trees to produce consensus tree or a list of tuples
            output of mcmc if mcmc=True, tuples like (tree, number of occurences in MCMC)

        mcmc : Boolean
            True if parameter trees is a tuple, output of mcmc

    if mcmc:
        trees = [tree[0] for tree in trees]
    trees_iter = iter(trees)
    first_tree = next(trees_iter)

    terms = first_tree.get_terminals()
    bitstr_counts, tree_count = _count_clades(itertools.chain([first_tree], trees_iter))

    # Store bitstrs for strict clades
    strict_bitstrs = [
        bitstr for bitstr, t in bitstr_counts.items() if t[0] == tree_count
    strict_bitstrs.sort(key=lambda bitstr: bitstr.count("1"), reverse=True)
    # Create root
    root = BaseTree.Clade()
    if strict_bitstrs[0].count("1") == len(terms):
        raise ValueError("Taxons in provided trees should be consistent")
    # make a bitstr to clades dict and store root clade
    bitstr_clades = {strict_bitstrs[0]: root}
    # create inner clades
    for bitstr in strict_bitstrs[1:]:
        clade_terms = [terms[i] for i in bitstr.index_one()]
        clade = BaseTree.Clade()
        for bs, c in bitstr_clades.items():
            # check if it should be the parent of current clade
            if bs.contains(bitstr):
                # remove old bitstring
                del bitstr_clades[bs]
                # update clade childs
                new_childs = [child for child in c.clades if child not in clade_terms]
                c.clades = new_childs
                # set current clade as child of c
                # update bitstring
                bs = bs ^ bitstr
                # update clade
                bitstr_clades[bs] = c
        # put new clade
        bitstr_clades[bitstr] = clade
    return BaseTree.Tree(root=root)
Пример #9
def adam_consensus(trees, mcmc=False):
    """Search Adam Consensus tree from multiple trees.

        trees : list
            list of trees to produce consensus tree or a list of tuples
            output of mcmc if mcmc=True, tuples like (tree, number of occurences in MCMC)

        mcmc : Boolean
            True if parameter trees is a tuple, output of mcmc

    if mcmc:
        trees = [tree[0] for tree in trees]
    clades = [tree.root for tree in trees]
    return BaseTree.Tree(root=_part(clades), rooted=True)
def strict_consensus(trees):
    """Search strict consensus tree from multiple trees.

        trees: list
            list of trees to produce consensus tree.
    terms = trees[0].get_terminals()
    bitstr_counts = _count_clades(trees)
    # Store bitstrs for strict clades
    strict_bitstrs = [bitstr for bitstr, t in bitstr_counts.items()
                      if t[0] == len(trees)]
    strict_bitstrs.sort(key=lambda bitstr: bitstr.count('1'), reverse=True)
    # Create root
    root = BaseTree.Clade()
    if strict_bitstrs[0].count('1') == len(terms):
        raise ValueError('Taxons in provided trees should be consistent')
    # make a bitstr to clades dict and store root clade
    bitstr_clades = {strict_bitstrs[0]: root}
    # create inner clades
    for bitstr in strict_bitstrs[1:]:
        clade_terms = [terms[i] for i in bitstr.index_one()]
        clade = BaseTree.Clade()
        for bs, c in bitstr_clades.items():
            # check if it should be the parent of current clade
            if bs.contains(bitstr):
                # remove old bitstring
                del bitstr_clades[bs]
                # update clade childs
                new_childs = [child for child in c.clades
                              if child not in clade_terms]
                c.clades = new_childs
                # set current clade as child of c
                # update bitstring
                bs = bs ^ bitstr
                # update clade
                bitstr_clades[bs] = c
        # put new clade
        bitstr_clades[bitstr] = clade
    return BaseTree.Tree(root=root)
Пример #11
	def create_tree(self):

		"""Methods that constructs upgma tree
		based on the distance matrix

		if hasattr(self, 'tree'):
			return self.tree

		if not self.distances:
			self.tree = None
			return None

		clades = [BaseTree.Clade(None, n) for n in self.distances.names]

		find_clade = lambda name: [i for i, el in enumerate(clades) if == name][0]

		while len(self.distances.names) > 1: 
			dist, i, j = _find_min(self.distances)
			i_clade, j_clade = find_clade(i), find_clade(j)
			new_clade = BaseTree.Clade(0, str(i) + str(j))

			_recalc_height(clades[i_clade], dist)
			_recalc_height(clades[j_clade], dist)


			if j_clade > i_clade:
				i_clade, j_clade = j_clade, i_clade


			self.distances = _join_clades(i, j,  self.distances)

		self.tree = BaseTree.Tree(clades[0])

		return self.tree
Пример #12
    def full_gpu_upgma(self, distance_matrix):

        # make a copy of the distance matrix to be used
        dm = copy.deepcopy(distance_matrix)

        dm_count = copy.deepcopy(dm)
        for i in range(1, len(dm_count)):
            for j in range(0, i):
                dm_count[i, j] = 1

        # init terminal clades
        clades = [BaseTree.Clade(None, name) for name in dm.names]

        # init minimum index
        min_i = 0
        min_j = 0
        inner_count = 0

        # GPU kernel to find the minimum index and minimum distance
        mod = SourceModule("""
         __global__ void findMin(double *dm, long long *index, double *local_min, int c, int l)
            int k = threadIdx.y + blockIdx.y*blockDim.y;  
            double min_dist = dm[k*c];  
            int id = 0 ;               
            for(int i= k*c ; i< (k+1)*c; i++){
                    if(min_dist >= dm[i])
                        min_dist = dm[i];
                        id = i;
            index[k]= id;


        while len(dm) > 1:
            # host array creation
            time_gpu_start = time.time()
            mat = dm.matrix
            dm_cpu = np.array(mat[1][:-1])
            for i in range(2, len(dm)):
                dm_cpu = np.append(dm_cpu, mat[i][:-1])

            combinations = int(((len(dm) - 1) * len(dm)) / 2)

            if combinations < 1024 * 256:
                block_size = int(round((len(dm)) / 2))
            elif combinations < 1024 * 1024:
                block_size = 512
                block_size = 1024

            local_count = int(round(combinations / block_size))
            if local_count < 1024:
                grid_size = 1
                grid_size = int(round(local_count / 1024)) + 1

            index = np.zeros(block_size, dtype=int)
            min_val = np.zeros(block_size, dtype=float)

            local_min_array_gpu = drv.mem_alloc(dm_cpu.nbytes)
            local_index_gpu = drv.mem_alloc(index.nbytes)
            local_min_gpu = drv.mem_alloc(min_val.nbytes)

            drv.memcpy_htod(local_min_array_gpu, dm_cpu)
            drv.memcpy_htod(local_index_gpu, index)
            drv.memcpy_htod(local_min_gpu, min_val)
            func = mod.get_function("findMin")

            # start.record()
                 block=(1, block_size, 1),
                 grid=(1, grid_size, 1))

            # end.record()
            # end.synchronize()
            drv.memcpy_dtoh(min_val, local_min_gpu)
            drv.memcpy_dtoh(index, local_index_gpu)

            min_val_new = min_val
            min_val = min_val.tolist()


            min_dist = min(min_val)
            global_id = 0
            for i in range(len(min_val_new)):
                if min_dist == min_val_new[i]:
                    global_id = index[i]

            for i in range(1, len(distance_matrix)):
                if global_id == 0:
                    min_i = 1
                    min_j = 0
                    t_val = ((i + 1) * (i + 2)) / 2
                    if global_id < t_val:
                        min_i = i + 1
                        min_j = global_id - (t_val - i - 1)
                    elif global_id == t_val:
                        min_i = i + 2
                        min_j = 0

            # create clade
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            inner_count += 1
            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))

            # assign branch length
            if clade1.is_terminal():
                clade1.branch_length = min_dist * 1.0 / 2
                clade1.branch_length = min_dist * \
                                       1.0 / 2 - self._height_of(clade1)

            if clade2.is_terminal():
                clade2.branch_length = min_dist * 1.0 / 2
                clade2.branch_length = min_dist * \
                                       1.0 / 2 - self._height_of(clade2)

            # update node list
            clades[min_j] = inner_clade
            del clades[min_i]

            # rebuild distance matrix,
            # set the distances of new node at the index of min_j

            for k in range(0, len(dm)):
                r = 0
                if k != min_i and k != min_j:
                    r = dm_count[min_i, k] + dm_count[min_j, k]
                    dm[min_j, k] = ((dm[min_i, k] * dm_count[min_i, k]) +
                                    (dm[min_j, k] * dm_count[min_j, k])) / r
                    dm_count[min_j, k] = r

            dm_count.names[min_j] = "Inner" + str(inner_count)
            del dm_count[min_i]

            dm.names[min_j] = "Inner" + str(inner_count)

            del dm[min_i]
            inner_clade.branch_length = 0
            del dm_cpu
        return BaseTree.Tree(inner_clade)
def makeUpgma(score, otuName):

    for i in range(len(score)):
        for j in range(len(score)):
            score[i][j] = round(score[i][j], 6)

    clades = [BaseTree.Clade(None, name) for name in otuName]

    # init minimum index
    min_i = 0
    min_j = 0
    inner_count = 0
    while len(score) > 1:
        min_dist = score[1][0]
        # find minimum index
        for i in range(1, len(score)):
            for j in range(0, i):
                if min_dist >= score[i][j]:
                    min_dist = score[i][j]
                    min_i = i
                    min_j = j

        # create clade
        clade1 = clades[min_i]
        clade2 = clades[min_j]
        inner_count += 1
        inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
        # assign branch length

        # TODO: originally self._height_of function from github repo
        #       was called but not implemented in this code.
        #       Function was input above.
        if clade1.is_terminal():
            clade1.branch_length = min_dist * 1.0 / 2
            clade1.branch_length = min_dist * \
                1.0 / 2 - height_of(clade1)

        if clade2.is_terminal():
            clade2.branch_length = min_dist * 1.0 / 2
            clade2.branch_length = min_dist * \
                1.0 / 2 - height_of(clade2)
        # update node list
        clades[min_j] = inner_clade
        del clades[min_i]

        # rebuild distance matrix,
        # set the distances of new node at the index of min_j
        for k in range(0, len(score)):
            if k != min_i and k != min_j:
                score[min_j][k] = (score[min_i][k] + score[min_j][k]) * 1.0 / 2
                score[k][min_j] = score[min_j][k]

        otuName[min_j] = "Inner" + str(inner_count)
        del score[min_i]
        for i in range(len(score)):
            del score[i][min_i]

    inner_clade.branch_length = 0
    return BaseTree.Tree(inner_clade)
Пример #14
clade_2500.branch_length = (3164 - 2500)
clade_owl_monkey.branch_length = 3164
clade_3164.branch_length = (4413 - 3164)
clade_lemur.branch_length = 4413
clade_4413.branch_length = (8758 - 4413)
clade_2356.branch_length = (8758 - 2356)
clade_rat.branch_length = 2356
clade_mouse.branch_length = 2356
clade_8758.branch_length = (17735 - 8758)
clade_7481.branch_length = (17735 - 7481)
clade_horse.branch_length = 7481
clade_muntjak.branch_length = 7481
clade_17735.branch_length = 18960 - 17735
clade_cat.branch_length = 18960

tree = BaseTree.Tree(root, rooted=True)



# Non-coding

clade_131584 = BaseTree.Clade(None, "")
clade_120079 = BaseTree.Clade(None, "")
clade_93169 = BaseTree.Clade(None, "")
clade_55571 = BaseTree.Clade(None, "")
clade_35195 = BaseTree.Clade(None, "")
clade_22904 = BaseTree.Clade(None, "")
clade_28998 = BaseTree.Clade(None, "")
def makeNj(score, otuName):
    for i in range(len(score)):
        for j in range(len(score)):
            score[i][j] = round(score[i][j], 6)
    clades = [BaseTree.Clade(None, name) for name in otuName]

    # init node distance
    node_dist = [0] * len(score)
    # init minimum index
    min_i = 0
    min_j = 0
    inner_count = 0
    while len(score) > 2:
        # calculate nodeDist
        for i in range(0, len(score)):
            node_dist[i] = 0
            for j in range(0, len(score)):
                node_dist[i] += score[i][j]
            node_dist[i] = node_dist[i] / (len(score) - 2)

        # find minimum distance pair
        min_dist = score[1][0] - node_dist[1] - node_dist[0]
        min_i = 0
        min_j = 1
        for i in range(1, len(score)):
            for j in range(0, i):
                temp = score[i][j] - node_dist[i] - node_dist[j]
                if min_dist > temp:
                    min_dist = temp
                    min_i = i
                    min_j = j
        # create clade
        clade1 = clades[min_i]
        clade2 = clades[min_j]
        inner_count += 1
        inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
        # assign branch length
        clade1.branch_length = (score[min_i][min_j] + node_dist[min_i] -
                                node_dist[min_j]) / 2.0
        clade2.branch_length = score[min_i][min_j] - clade1.branch_length

        # update node list
        clades[min_j] = inner_clade
        del clades[min_i]

        # rebuild distance matrix,
        # set the distances of new node at the index of min_j
        for k in range(0, len(score)):
            if k != min_i and k != min_j:
                score[min_j][k] = (score[min_i][k] + score[min_j][k] -
                                   score[min_i][min_j]) / 2.0
                score[k][min_j] = score[min_j][k]

        otuName[min_j] = "Inner" + str(inner_count)
        del score[min_i]
        for i in range(len(score)):
            del score[i][min_i]

    # set the last clade as one of the child of the inner_clade
    root = None
    if clades[0] == inner_clade:
        clades[0].branch_length = 0
        clades[1].branch_length = score[1][0]
        root = clades[0]
        clades[0].branch_length = score[1][0]
        clades[1].branch_length = 0
        root = clades[1]

    return BaseTree.Tree(root, rooted=False)
    def _upgma(self, dm: Matrix):
        """ UPGMA.

            dm: distance matrix
        # Create tree nodes
        clades = [BaseTree.Clade(None, name) for name in dm.labels]
        clade_children_branch_length = Vector(dm.labels)
        clade_nr_of_children = Vector(dm.labels, init_value=1)
        nr_inner = 0

        # Number of nodes
        N = dm.size

        # Run until one node remains.
        while N > 1:
            # Find the index of the smallest value in the distance matrix
            argmin1, argmin2 = dm.argmin(
                only_positive=True)  # Labels belonging to the smallest value
            minpos1, minpos2 = dm.posmin(
                only_positive=True)  # Indices belonging to the smallest value
            c1, c2 = clades[minpos1], clades[
                minpos2]  # Fetch the corresponding clades

            # New inner node
            node_name = "Inner{}".format(nr_inner)
            inner_clade = BaseTree.Clade(0, node_name)

            # Calculate branch length for the two old nodes
            c1.branch_length = dm[argmin1][
                argmin2] / 2 - clade_children_branch_length[argmin1]
            c2.branch_length = dm[argmin1][
                argmin2] / 2 - clade_children_branch_length[argmin2]

            # Append these to the new node

            # Append new node

            # Branch length from the current node to the leaves
            clade_children_branch_length[node_name] = dm[argmin1][argmin2] / 2

            # Number of children in the new group
            clade_nr_of_children[node_name] = clade_nr_of_children[
                argmin1] + clade_nr_of_children[argmin2]

            # Calculate the distance from the new node to all the other nodes (not including the ones we want to remove).
            neighbor_labels = set(dm.labels) - {node_name, argmin1, argmin2}
            for label in neighbor_labels:
                # Either dm[argmin1][label] or dm[label][argmin1] is zero. Same with dm[argmin2][label] and dm[label][argmin2].
                w1, w2 = clade_nr_of_children[argmin1], clade_nr_of_children[
                dm[label][node_name] = \
                    ((dm[argmin1][label] + dm[label][argmin1]) * w1 + (dm[argmin2][label] + dm[label][argmin2]) * w2) / (w1 + w2)

            # Delete appropriate rows, columns and labels.
            dm.drop([argmin1, argmin2])

            # Discard the old nodes and append the new one. The old clades need to be discarded because relative position is used when referring to them!
            del clades[minpos2]
            del clades[minpos1]
            nr_inner += 1

            # Number of nodes remaining
            N = dm.size

        # Create tree
        self.tree = BaseTree.Tree(clades[0], rooted=True)
Пример #17
    def nj_full_gpu(self, distance_matrix):
        if not isinstance(distance_matrix, DistanceMatrix):
            raise TypeError("Must provide a DistanceMatrix object.")

        # make a copy of the distance matrix to be used
        dm = copy.deepcopy(distance_matrix)
        # init terminal clades
        clades = [BaseTree.Clade(None, name) for name in dm.names]
        # init node distance
        node_dist = [0] * len(dm)
        # init minimum index
        min_i = 0
        min_j = 0
        inner_count = 0
        total_time = 0
        total_time2 = 0
        # special cases for Minimum Alignment Matrices
        if len(dm) == 1:
            root = clades[0]

            return BaseTree.Tree(root, rooted=False)
        elif len(dm) == 2:
            # minimum distance will always be [1,0]
            min_i = 1
            min_j = 0
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            clade1.branch_length = dm[min_i, min_j] / 2.0
            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length
            inner_clade = BaseTree.Clade(None, "Inner")
            clades[0] = inner_clade
            root = clades[0]

            return BaseTree.Tree(root, rooted=False)

        mod = SourceModule("""
          #include <stdio.h>
          #include <stdlib.h>
          __global__ void DeviceNodeDist(double *device_dm, double *device_node_dist, int N)
              const int tid = threadIdx.y + blockIdx.y* blockDim.y;
              if (tid >= N) return;
              for(int i = 0; i< N; i++){
                if(tid< i){
                    device_node_dist[tid] += device_dm[(i*(i+1))/2 + tid];

                    device_node_dist[tid] += device_dm[(tid*(tid+1))/2 + i];



          device_node_dist[tid]= (double)(device_node_dist[tid]/ (N-2));

        mod1 = SourceModule("""
         __global__ void findMin(double *dm, double *node_dist, long long *index_x, long long *index_y, double *local_min, int c, int l, int dm_length)
            int k = threadIdx.y + blockIdx.y*blockDim.y;

            double min_dist = 0.0;  

            int min_x =0;
            int min_y =0;
            int x = 0;
            int y = 0;

            for(int i= k*c ; i< (k+1)*c; i++){
                    for(int j=0; j<dm_length; j++){
                            int t_val = ((j+1)*(j+2))/2 ;
                            if(i < t_val){
                                y= i-(t_val-j-1);
                            }else if(i== t_val){
                                x = j+2;
                                y = 0;


                    double temp = dm[i] - (node_dist[x] + node_dist[y] );
                    if(min_dist > temp){
                        min_dist = temp;
                        min_x = x;
                        min_y = y;
            index_x[k]= min_x;
            index_y[k]= min_y;

        # print("Time taken to run SourceModule %s" % (time.time()-in_t1))
        while len(dm) > 2:

            # calculate nodeDist
            host_dm = []  # 1D list for distance matrix
            for list in dm.matrix:

            host_dm = np.array(host_dm)
            # host_dm = host_dm.astype(np.float32)
            length = len(dm)
            host_node_dist = np.zeros((length,), dtype=float)
            # host_node_dist = host_node_dist.astype(np.float32)

            ###GPU code
            start = cuda.Event()
            end = cuda.Event()

            # get the optimum block size based on dataset size
            if (length < 128):
                BLOCKSIZE = 128
            elif (length < 256):
                BLOCKSIZE = 256
            elif (length < 512):
                BLOCKSIZE = 512
                BLOCKSIZE = 1024

            ###Allocate GPU device memory
            device_dm = cuda.mem_alloc(host_dm.nbytes)
            device_node_dist = cuda.mem_alloc(host_node_dist.nbytes)

            ###Memcopy from host to device
            cuda.memcpy_htod(device_dm, host_dm)

            DeviceNodeDist = mod.get_function("DeviceNodeDist")

            blockDim = (1, BLOCKSIZE, 1)
            gridDim = (1, length / BLOCKSIZE + 1, 1)


            DeviceNodeDist(device_dm, device_node_dist, np.int32(length), block=blockDim, grid=gridDim)

            node_dist1 = np.empty_like(host_node_dist)
            cuda.memcpy_dtoh(node_dist1, device_node_dist)
            node_dist2 = node_dist1.tolist()
            node_dist[0:len(node_dist2)] = node_dist2


            in_t2 = time.time()

            start1 = cuda.Event()
            end1 = cuda.Event()

            mat = dm.matrix
            dm_cpu = np.array(mat[1][:-1])
            for i in range(2, len(dm)):
                dm_cpu = np.append(dm_cpu, mat[i][:-1])

            combinations = int(((len(dm) - 1) * len(dm)) / 2)

            if combinations < 1024 * 128:
                block_size = int(round((len(dm)) / 2))
                block_size = 512

            local_count = int(round(combinations / block_size))
            index_x = np.zeros(block_size, dtype=int)
            index_y = np.zeros(block_size, dtype=int)
            min_val = np.zeros(block_size, dtype=float)

            local_min_array_gpu = cuda.mem_alloc(dm_cpu.nbytes)
            local_index_gpux = cuda.mem_alloc(index_x.nbytes)
            local_index_gpuy = cuda.mem_alloc(index_y.nbytes)
            local_min_gpu = cuda.mem_alloc(min_val.nbytes)

            cuda.memcpy_htod(local_min_array_gpu, dm_cpu)

            func = mod1.get_function("findMin")
            func(local_min_array_gpu, device_node_dist, local_index_gpux, local_index_gpuy, local_min_gpu,
                 np.int32(local_count), np.int32(len(dm_cpu)), np.int32(len(dm)),
                 block=(1, block_size, 1))

            cuda.memcpy_dtoh(min_val, local_min_gpu)
            cuda.memcpy_dtoh(index_x, local_index_gpux)
            cuda.memcpy_dtoh(index_y, local_index_gpuy)

            min_val_new = min_val.tolist()


            min_dist = min(min_val_new)

            for i in range(len(min_val)):
                if min_dist == min_val[i]:
                    min_i = index_x[i]
                    min_j = index_y[i]

            del host_dm
            del host_node_dist
            del dm_cpu

            total_time2 += time.time() - in_t2

            # create clade
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            inner_count += 1
            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
            # assign branch length
            clade1.branch_length = (dm[min_i, min_j] + node_dist[min_i] -
                                    node_dist[min_j]) / 2.0
            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length

            # update node list
            clades[min_j] = inner_clade
            del clades[min_i]

            # rebuild distance matrix,
            # set the distances of new node at the index of min_j
            for k in range(0, len(dm)):
                if k != min_i and k != min_j:
                    dm[min_j, k] = (dm[min_i, k] + dm[min_j, k] -
                                    dm[min_i, min_j]) / 2.0

            dm.names[min_j] = "Inner" + str(inner_count)
            del dm[min_i]

        #print("Time taken for min dist node calculation= %s" % total_time2)
        # set the last clade as one of the child of the inner_clade
        root = None
        if clades[0] == inner_clade:
            clades[0].branch_length = 0
            clades[1].branch_length = dm[1, 0]
            root = clades[0]
            clades[0].branch_length = dm[1, 0]
            clades[1].branch_length = 0
            root = clades[1]

        return BaseTree.Tree(root, rooted=False)
Пример #18
    def create_tree(self, names, matrix):
        if not names or not matrix:
            return self.tree

        distance_matrix = DistanceMatrix(names, matrix)
        dm = copy.deepcopy(distance_matrix)
        clades = [BaseTree.Clade(None, name) for name in dm.names]
        # clades[0].clades.append(clades[2])
        while len(clades) != 1:
            q_matrix = []
            q_names = dm.names
            for ind_i, i in enumerate(dm.matrix):
                tmp = []
                for ind_j, j in enumerate(i):
                    if ind_i == ind_j:
                    tmp.append((len(dm) - 2) * j - sum(dm[ind_i]) -
            q_matrix = DistanceMatrix(q_names, q_matrix)

            min_i = float('Inf')
            min_j = float('Inf')
            q_min = float('Inf')
            for ind_i, i in enumerate(q_matrix):
                for ind_j, j in enumerate(i):
                    if j < q_min:
                        q_min = j
                        min_i = ind_i
                        min_j = ind_j

            if len(clades) == 2:
                # print('c:', clade_j)
                if min_i == 0:
                    clade_j = clades[min_j]
                    clade_j.branch_length = dm[min_i][min_j]
                    del clades[min_j]
                if min_i == 1:
                    clade_i = clades[min_i]
                    clade_i.branch_length = dm[min_i][min_j]
                    del clades[min_i]

            dist_i = 0.5 * dm[min_i][min_j] + (
                sum(dm[min_i]) - sum(dm[min_j])) / (2 * (len(dm) - 2))
            dist_j = dm[min_i][min_j] - dist_i

            clade_i = clades[min_i]
            clade_j = clades[min_j]
            clade_i.branch_length = dist_i
            clade_j.branch_length = dist_j

            tmp_clade = BaseTree.Clade(None, dm.names[min_i] + dm.names[min_j])

            clades[min_j] = tmp_clade
            del clades[min_i]
            # print(clades)

            tmp_dist = []
            for k in range(len(dm)):
                if k == min_j or k == 0:
                    0.5 * (dm[min_i][k] + dm[min_j][k] - dm[min_i][min_j]))
            dm[min_j] = tmp_dist

            dm.names[min_j] = dm.names[min_i] + dm.names[min_j]
            del dm[min_i]

        self.tree = BaseTree.Tree(clades[0], rooted=False)
        # print('res: ', BaseTree.Tree(clades[0], rooted = False))
        return self.tree
Пример #19
    def upgma(self, distance_matrix):

        # make a copy of the distance matrix to be used
        dm = copy.deepcopy(distance_matrix)
        dm_count = copy.deepcopy(dm)
        for i in range(1, len(dm_count)):
            for j in range(0, i):
                dm_count[i, j] = 1

        # init terminal clades
        clades = [BaseTree.Clade(None, name) for name in dm.names]

        # init minimum index
        min_i = 0
        min_j = 0
        inner_count = 0

        while len(dm) > 1:
            min_dist = dm[1, 0]
            # find minimum index

            mintime = time.time()
            for i in range(1, len(dm)):
                for j in range(0, i):
                    if min_dist >= dm[i, j]:
                        min_dist = dm[i, j]
                        min_i = i
                        min_j = j

            mintime2 = time.time()

   += mintime2 - mintime

            # create clade
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            inner_count += 1
            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))

            # assign branch length
            if clade1.is_terminal():
                clade1.branch_length = min_dist * 1.0 / 2
                clade1.branch_length = min_dist * \
                                       1.0 / 2 - self._height_of(clade1)

            if clade2.is_terminal():
                clade2.branch_length = min_dist * 1.0 / 2
                clade2.branch_length = min_dist * \
                                       1.0 / 2 - self._height_of(clade2)

            # update node list
            clades[min_j] = inner_clade
            del clades[min_i]

            # rebuild distance matrix,
            # set the distances of new node at the index of min_j
            for k in range(0, len(dm)):
                r = 0
                if k != min_i and k != min_j:
                    r = dm_count[min_i, k] + dm_count[min_j, k]
                    dm[min_j, k] = ((dm[min_i, k] * dm_count[min_i, k]) +
                                    (dm[min_j, k] * dm_count[min_j, k])) / r
                    dm_count[min_j, k] = r

            dm_count.names[min_j] = "Inner" + str(inner_count)
            del dm_count[min_i]

            dm.names[min_j] = "Inner" + str(inner_count)

            del dm[min_i]
            inner_clade.branch_length = 0

        return BaseTree.Tree(inner_clade)
Пример #20
    def main(self, tree_filename, tree_format='newick'):
        col_delimiter = '|'
        url = ''

        # download the taxonomy archive
        filename = self.download_file(url)

        # extract the tables
        for extract in ('taxonomic_units', 'longnames', 'synonym_links',
            if os.path.exists(os.path.join(self.data_dir, extract)):
                print 'Using existing copy of %s' % extract
                print 'Extracting %s from %s...' % (extract, filename)
                archive =, mode='r:gz')
                full_extract = [
                    x for x in archive.getnames()
                    if x.split('/')[-1] == extract
                member = archive.getmember(full_extract)
       = extract
                archive.extract(extract, path=self.data_dir)

        # get names for all ITIS TSNs from longnames table
        print 'Getting names...'
        names = {}
        with open(os.path.join(self.data_dir, 'longnames')) as names_file:
            for line in names_file:
                line = line.strip()
                values = line.split(col_delimiter)
                tax_id, name = values
                names[tax_id] = name

        # read all node info from taxonomic_units
        print 'Reading taxonomy...'
        nodes = {}
        with open(os.path.join(self.data_dir,
                               'taxonomic_units')) as nodes_file:
            for line in nodes_file:
                line = line.strip()
                values = line.split(col_delimiter)

                (tax_id, usage, parent_id,
                 uncertain_parent) = [values[n] for n in (0, 10, 17, 23)]

                #if uncertain_parent: continue
                if not usage in ('accepted', 'valid'): continue

                name = names[tax_id]
                this_node = BaseTree.Clade(name=name)
                nodes[tax_id] = this_node
                this_node.parent_id = parent_id

        other_names = defaultdict(set)
        if tree_format == 'cdao':
            # get synonym definitions
            print 'Getting synonyms...'
            with open(os.path.join(self.data_dir,
                                   'synonym_links')) as synonym_file:
                for line in synonym_file:
                    line = line.strip()
                    values = line.split(col_delimiter)
                    node_id, syn_id, _ = values
                    nodes[node_id] = ('synonym', names[node_id], syn_id)
            with open(os.path.join(self.data_dir,
                                   'vernaculars')) as synonym_file:
                for line in synonym_file:
                    line = line.strip()
                    values = line.split(col_delimiter)
                    tax_id, name = values[:2]

        print 'Found %s OTUs.' % len(nodes)
        nodes['0'] = root_node = BaseTree.Clade()

        # create tree from nodes dictionary
        print 'Building tree...'
        for node_id, this_node in nodes.iteritems():
            if node_id == '0': continue

            if isinstance(this_node, BaseTree.Clade):
                    parent_node = nodes[this_node.parent_id]

                except (KeyError, AttributeError):

                del this_node.parent_id

                if not hasattr(this_node, 'tu_attributes'):
                    this_node.tu_attributes = []
                for name in other_names[node_id]:

            elif this_node[0] == 'synonym':
                _, name, syn_id = this_node
                    accepted_node = nodes[syn_id]
                except KeyError:

                if not isinstance(accepted_node, BaseTree.Clade): continue

                if not hasattr(accepted_node, 'tu_attributes'):
                    nodes[syn_id].tu_attributes = []
                #print 'Synonym: %s -> %s' % (name, nodes[syn_id].name)

        tree = BaseTree.Tree(root=root_node)

        # write tree to file
        print 'Writing %s tree to %s...' % (tree_format, tree_filename)
        bp.write([tree], tree_filename, tree_format)

        print 'Done!' ''
Пример #21
    def main(self, tree_filename, tree_format='newick'):
        col_delimiter = '\t'
        url = ''

        # download the taxonomy archive
        filename = self.download_file(url)

        # extract the tables
        extract = 'taxon.txt'
        if os.path.exists(os.path.join(self.data_dir, extract)):
            print 'Using existing copy of %s' % extract
            print 'Extracting %s from %s...' % (extract, filename)
            archive = zipfile.ZipFile(filename, mode='r')
            archive.extract(extract, path=self.data_dir)

        # build BioPython clades
        print 'Reading taxonomy...'
        nodes = {}
        with open(os.path.join(self.data_dir, 'taxon.txt')) as taxonomy_file:
            for line in taxonomy_file:
                line = line.strip()
                values = line.split(col_delimiter)
                id, parent_id, syn_id, _, name, _, status = values[:7]

                # skip incertae sedis taxa
                if id == '0': continue

                if syn_id and not 'synonym' in status:
                elif syn_id and 'synonym' in status:
                    if tree_format == 'cdao':
                        nodes[id] = ('synonym', name, syn_id)
                elif not syn_id:
                    nodes[id] = BaseTree.Clade(name=name)
                    nodes[id].parent_id = parent_id

        print 'Found %s OTUs.' % len(nodes)
        nodes[''] = root_node = BaseTree.Clade()

        # create tree from nodes dictionary
        print 'Building tree...'
        for node_id, this_node in nodes.iteritems():
            if not node_id: continue

            if isinstance(this_node, BaseTree.Clade):
                    parent_node = nodes[this_node.parent_id]
                    del this_node.parent_id
                except (KeyError, AttributeError):

            elif this_node[0] == 'synonym':
                _, name, syn_id = this_node
                    accepted_node = nodes[syn_id]
                except KeyError:

                if not isinstance(accepted_node, BaseTree.Clade): continue

                if not hasattr(accepted_node, 'tu_attributes'):
                    nodes[syn_id].tu_attributes = []
                #print 'Synonym: %s -> %s' % (name, nodes[syn_id].name)

        tree = BaseTree.Tree(root=root_node)

        # write tree to file
        print 'Writing %s tree to %s...' % (tree_format, tree_filename)
        bp.write([tree], tree_filename, tree_format)

        print 'Done!' ''
    def _neigbor_joining(self, dm: Matrix):
        """ Neighbor joining.

            dm: distance matrix
        # Create tree nodes
        clades = [BaseTree.Clade(None, name) for name in dm.labels]
        nr_inner = 0

        N = dm.size

        # Run until two nodes remain.
        while N > 2:
            labels = dm.labels
            # Divergence
            divergence = Vector(labels)
            for label in labels:
                # Sum up all the distances for the given label
                divergence[label] = sum([dm[label][i] for i in labels]) + sum(
                    [dm[i][label] for i in labels])

            # New distance matrix
            dm_new = Matrix(labels)
            for label1, label2 in itertools.combinations(labels, 2):
                dm_new[label1][label2] = dm[label1][label2] - (
                    divergence[label1] + divergence[label2]) / (N - 2)

            # Find the index of the smallest value in the distance matrix
            argmin1, argmin2 = dm_new.argmin(
            )  # Labels belonging to the smallest value
            minpos1, minpos2 = dm_new.posmin(
            )  # Indices belonging to the smallest value
            c1, c2 = clades[minpos1], clades[
                minpos2]  # Fetch the corresponding clades

            # New inner node
            node_name = "Inner{}".format(nr_inner)
            inner_clade = BaseTree.Clade(None, node_name)

            # Calculate branch length for the two old nodes
            c1.branch_length = dm[argmin1][argmin2] / 2 + (
                divergence[argmin1] - divergence[argmin2]) / (2 * (N - 2))
            c2.branch_length = dm[argmin1][argmin2] - c1.branch_length

            # Append these to the new node

            # Calculate the distance from the new node to all the other nodes (not including the ones we want to remove).

            neighbor_labels = set(labels) - {node_name, argmin1, argmin2}
            for label in neighbor_labels:
                dm[label][node_name] = (
                    dm[argmin1][label] + dm[label][argmin1] +
                    dm[argmin2][label] + dm[label][argmin2] -
                    dm[argmin1][argmin2]) / 2

            # Delete appropriate rows, columns and labels.
            dm.drop([argmin1, argmin2])

            # Discard the old nodes and append the new one
            del clades[minpos2]
            del clades[minpos1]
            nr_inner += 1

            # Number of nodes remaining
            N = dm.size

        # Join last two nodes (inner_clade is clade[1])
        argmin1, argmin2 = dm.argmax()  # Labels
        clades[0].branch_length = dm[argmin1][
            argmin2]  # Distance is the branch length

        # Create tree
        self.tree = BaseTree.Tree(inner_clade, rooted=True)
Пример #23
def majority_consensus(trees, cutoff=0, mcmc=False, n=1):
    """Search majority rule consensus tree from multiple trees.

    This is a extend majority rule method, which means the you can set any
    cutoff between 0 ~ 1 instead of 0.5. The default value of cutoff is 0 to
    create a relaxed binary consensus tree in any condition (as long as one of
    the provided trees is a binary tree). The branch length of each consensus
    clade in the result consensus tree is the average length of all counts for
    that clade.

        trees : iterable
            iterable of trees to produce consensus tree or a list of tuples
            output of mcmc if mcmc=True, tuples like (tree, number of occurences in MCMC)

        cutoff : float
            Must be between 0 and 1. cutoff=0.5 means, that all clades in the consensus tree
            must occur in at least 50% of trees, cutoff=1 is the same as strict consensus.

        mcmc : Boolean
            True if parameter trees is a tuple, output of mcmc

        n : integer
            Maximum number of best consensus trees returned - if the number is too big,
            it may be impossible to produce that many different consensus tree.

    if not (0 <= cutoff <= 1):
        raise ValueError("Cutoff must be a number between 0 and 1")
    tree_iter = iter(trees)
    first_tree = next(tree_iter)
    if mcmc:
        terms = first_tree[0].get_terminals()
        term_names = [ for term in terms]
        bitstr_counts, tree_count = _count_clades_mcmc(itertools.chain([first_tree], tree_iter), term_names)
        terms = first_tree.get_terminals()
        term_names = [ for term in terms]
        bitstr_counts, tree_count = _count_clades(itertools.chain([first_tree], tree_iter), term_names)
    # Sort bitstrs by descending #occurrences, then #tips, then tip order
    bitstrs = sorted(
        key=lambda bitstr: (bitstr_counts[bitstr][0], bitstr.count("1"), str(bitstr)),
    if not bitstrs[0].count("1") == len(terms):
        raise ValueError("Taxons in provided trees should be consistent")
    # Make a bitstr-to-clades dict and store root clade
    # create inner clades
    possible_starts = queue.Queue()
    clades_used = set()
    consensus_trees = []
    # we will try to produce n different consensus trees, starting with bitstrings
    # that were not compatible with previous trees
    while len(consensus_trees) < n and not possible_starts.empty():
        root = BaseTree.Clade()

        bitstr_clades = {bitstrs[0]: root}
        new_start = possible_starts.get()
        new_start_appeared = 0
        for bitstr in itertools.chain(new_start, bitstrs[1:]):
            if bitstr == new_start[0]:
                new_start_appeared += 1
                if new_start_appeared > 1:
            if bitstr == bitstrs[0]:
            # apply majority rule
            count_in_trees, branch_length_sum = bitstr_counts[bitstr]
            confidence = count_in_trees / tree_count
            if confidence < cutoff:
            clade_terms = [terms[i] for i in bitstr.index_one()]
            clade = BaseTree.Clade()
            clade.confidence = confidence
            clade.branch_length = branch_length_sum / count_in_trees
            bsckeys = sorted(bitstr_clades, key=lambda bs: bs.count("1"), reverse=True)

            # check if current clade is compatible with previous clades and
            # record it's possible parent and child clades.
            compatible = True
            parent_bitstr = None
            child_bitstrs = []  # multiple independent childs
            for bs in bsckeys:
                if not bs.iscompatible(bitstr):
                    if bitstr not in clades_used:
                    compatible = False
                # assign the closest ancestor as its parent
                # as bsckeys is sorted, it should be the last one
                if bs.contains(bitstr):
                    parent_bitstr = bs
                # assign the closest descendant as its child
                # the largest and independent clades
                if (
                    and bs != bitstr
                    and all(c.independent(bs) for c in child_bitstrs)
            if not compatible:

            if parent_bitstr:
                # insert current clade; remove old bitstring
                parent_clade = bitstr_clades.pop(parent_bitstr)
                # update parent clade childs
                parent_clade.clades = [
                    c for c in parent_clade.clades if c not in clade_terms
                # set current clade as child of parent_clade
                # update bitstring
                # parent = parent ^ bitstr
                # update clade
                bitstr_clades[parent_bitstr] = parent_clade

            if child_bitstrs:
                remove_list = []
                for c in child_bitstrs:
                    child_clade = bitstr_clades[c]
                remove_terms = [terms[i] for i in remove_list]
                clade.clades = [c for c in clade.clades if c not in remove_terms]
            # put new clade
            bitstr_clades[bitstr] = clade
    if n == 1:
        return consensus_trees[0]
    return consensus_trees
Пример #24
    def main(self, tree_filename, tree_format='newick', ids=None):
        col_delimiter = '\t|\t'
        row_delimiter = '\t|\n'
        url = ''

        # download the taxonomy archive
        filename = self.download_file(url)

        # extract the text dump
        for extract in ('nodes.dmp', 'names.dmp'):
            if os.path.exists(os.path.join(self.data_dir, extract)):
                print 'Using existing copy of %s' % extract
                print 'Extracting %s from %s...' % (extract, filename)
                archive =, mode='r:gz')
                archive.extract(extract, path=self.data_dir)

        # get names for all tax_ids from names.dmp
        print 'Getting names...'
        scientific_names = {}
        other_names = defaultdict(set)
        with open(os.path.join(self.data_dir, 'names.dmp')) as names_file:
            for line in names_file:
                line = line.rstrip(row_delimiter)
                values = line.split(col_delimiter)
                tax_id, name_txt, _, name_type = values[:4]
                if name_type == 'scientific name':
                    scientific_names[tax_id] = name_txt

        # read all node info from nodes.dmp
        print 'Reading taxonomy...'
        nodes = {}
        with open(os.path.join(self.data_dir, 'nodes.dmp')) as nodes_file:
            for line in nodes_file:
                line = line.rstrip(row_delimiter)
                values = line.split(col_delimiter)
                tax_id, parent_id = values[:2]
                if ids:
                    this_node = BaseTree.Clade(name=tax_id)
                    this_node = BaseTree.Clade(name=scientific_names[tax_id])

                nodes[tax_id] = this_node
                this_node.parent_id = parent_id

                if tree_format == 'cdao':
                    # add common names, synonyms, mispellings, etc. as skos:altLabels
                    if not hasattr(this_node, 'tu_attributes'):
                        this_node.tu_attributes = []
                    for x in other_names[tax_id]:

        print 'Found %s OTUs.' % len(nodes)

        # create tree from nodes dictionary
        print 'Building tree...'
        for node_id, this_node in nodes.iteritems():
            if node_id == this_node.parent_id:
                root_node = this_node
                print 'Found root.'
                parent_node = nodes[this_node.parent_id]

            del this_node.parent_id

        tree = BaseTree.Tree(root=root_node)

        # write tree to file
        print 'Writing %s tree to %s...' % (tree_format, tree_filename)
        bp.write([tree], tree_filename, tree_format)

        print 'Done!'
Пример #25
def majority_consensus(trees, cutoff=0):
    """Search majority rule consensus tree from multiple trees.

    This is a extend majority rule method, which means the you can set any
    cutoff between 0 ~ 1 instead of 0.5. The default value of cutoff is 0 to
    create a relaxed binary consensus tree in any condition (as long as one of
    the provided trees is a binary tree). The branch length of each consensus
    clade in the result consensus tree is the average length of all counts for
    that clade.

        trees : iterable
            iterable of trees to produce consensus tree.
    tree_iter = iter(trees)
    first_tree = next(tree_iter)

    terms = first_tree.get_terminals()
    bitstr_counts, tree_count = _count_clades(
        itertools.chain([first_tree], tree_iter))

    # Sort bitstrs by descending #occurrences, then #tips, then tip order
    bitstrs = sorted(
        key=lambda bitstr:
        (bitstr_counts[bitstr][0], bitstr.count('1'), str(bitstr)),
    root = BaseTree.Clade()
    if bitstrs[0].count('1') == len(terms):
        raise ValueError('Taxons in provided trees should be consistent')
    # Make a bitstr-to-clades dict and store root clade
    bitstr_clades = {bitstrs[0]: root}
    # create inner clades
    for bitstr in bitstrs[1:]:
        # apply majority rule
        count_in_trees, branch_length_sum = bitstr_counts[bitstr]
        confidence = 100.0 * count_in_trees / tree_count
        if confidence < cutoff * 100.0:
        clade_terms = [terms[i] for i in bitstr.index_one()]
        clade = BaseTree.Clade()
        clade.confidence = confidence
        clade.branch_length = branch_length_sum / count_in_trees
        bsckeys = sorted(bitstr_clades,
                         key=lambda bs: bs.count('1'),

        # check if current clade is compatible with previous clades and
        # record it's possible parent and child clades.
        compatible = True
        parent_bitstr = None
        child_bitstrs = []  # multiple independent childs
        for bs in bsckeys:
            if not bs.iscompatible(bitstr):
                compatible = False
            # assign the closest ancestor as its parent
            # as bsckeys is sorted, it should be the last one
            if bs.contains(bitstr):
                parent_bitstr = bs
            # assign the closest descendant as its child
            # the largest and independent clades
            if (bitstr.contains(bs) and bs != bitstr
                    and all(c.independent(bs) for c in child_bitstrs)):
        if not compatible:

        if parent_bitstr:
            # insert current clade; remove old bitstring
            parent_clade = bitstr_clades.pop(parent_bitstr)
            # update parent clade childs
            parent_clade.clades = [
                c for c in parent_clade.clades if c not in clade_terms
            # set current clade as child of parent_clade
            # update bitstring
            # parent = parent ^ bitstr
            # update clade
            bitstr_clades[parent_bitstr] = parent_clade

        if child_bitstrs:
            remove_list = []
            for c in child_bitstrs:
                child_clade = bitstr_clades[c]
            remove_terms = [terms[i] for i in remove_list]
            clade.clades = [c for c in clade.clades if c not in remove_terms]
        # put new clade
        bitstr_clades[bitstr] = clade
        if ((len(bitstr_clades) == len(terms) - 1) or
            (len(bitstr_clades) == len(terms) - 2 and len(root.clades) == 3)):
    return BaseTree.Tree(root=root)
Пример #26
clades = [BaseTree.Clade(None, name) for name in names]
clade1 = clades[0]
clade2 = clades[1]

inner_clade = BaseTree.Clade(None, "Inner0")

del clades[1]
del clades[0]

inner_clade = BaseTree.Clade(None, "Inner1")
del clades[1]
del clades[0]

inner_clade = BaseTree.Clade(None, "Inner2")
tree = BaseTree.Tree(inner_clade, rooted=True)


Пример #27
    def nj(self, distance_matrix):
        """Construct and return a Neighbor Joining tree.

            distance_matrix : DistanceMatrix
                The distance matrix for tree construction.

        if not isinstance(distance_matrix, DistanceMatrix):
            raise TypeError("Must provide a DistanceMatrix object.")

        # make a copy of the distance matrix to be used
        dm = copy.deepcopy(distance_matrix)
        # init terminal clades
        clades = [BaseTree.Clade(None, name) for name in dm.names]
        # init node distance
        node_dist = [0] * len(dm)
        # init minimum index
        min_i = 0
        min_j = 0
        inner_count = 0
        # special cases for Minimum Alignment Matrices
        if len(dm) == 1:
            root = clades[0]

            return BaseTree.Tree(root, rooted=False)
        elif len(dm) == 2:
            # minimum distance will always be [1,0]
            min_i = 1
            min_j = 0
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            clade1.branch_length = dm[min_i, min_j] / 2.0
            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length
            inner_clade = BaseTree.Clade(None, "Inner")
            clades[0] = inner_clade
            root = clades[0]

            return BaseTree.Tree(root, rooted=False)
        while len(dm) > 2:
            # calculate nodeDist
            for i in range(0, len(dm)):
                node_dist[i] = 0
                for j in range(0, len(dm)):
                    node_dist[i] += dm[i, j]
                node_dist[i] = node_dist[i] / (len(dm) - 2)

            # find minimum distance pair
            min_dist = dm[1, 0] - node_dist[1] - node_dist[0]
            min_i = 0
            min_j = 1
            for i in range(1, len(dm)):
                for j in range(0, i):
                    temp = dm[i, j] - node_dist[i] - node_dist[j]
                    if min_dist > temp:
                        min_dist = temp
                        min_i = i
                        min_j = j
            # create clade
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            inner_count += 1
            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
            # assign branch length
            clade1.branch_length = (dm[min_i, min_j] + node_dist[min_i] -
                                    node_dist[min_j]) / 2.0
            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length

            # update node list
            clades[min_j] = inner_clade
            del clades[min_i]

            # rebuild distance matrix,
            # set the distances of new node at the index of min_j
            for k in range(0, len(dm)):
                if k != min_i and k != min_j:
                    dm[min_j, k] = (dm[min_i, k] + dm[min_j, k] -
                                    dm[min_i, min_j]) / 2.0

            dm.names[min_j] = "Inner" + str(inner_count)
            del dm[min_i]

        # set the last clade as one of the child of the inner_clade
        root = None
        if clades[0] == inner_clade:
            clades[0].branch_length = 0
            clades[1].branch_length = dm[1, 0]
            root = clades[0]
            clades[0].branch_length = dm[1, 0]
            clades[1].branch_length = 0
            root = clades[1]

        return BaseTree.Tree(root, rooted=False)
Пример #28
    def nj(self, distance_matrix):
        if not isinstance(distance_matrix, DistanceMatrix):
            raise TypeError("Must provide a DistanceMatrix object.")

        # make a copy of the distance matrix to be used
        dm = copy.deepcopy(distance_matrix)
        # init terminal clades
        clades = [BaseTree.Clade(None, name) for name in dm.names]
        # init node distance
        node_dist = [0] * len(dm)
        # init minimum index
        min_i = 0
        min_j = 0
        inner_count = 0
        total_time = 0
        total_time2 = 0
        # special cases for Minimum Alignment Matrices
        if len(dm) == 1:
            root = clades[0]

            return BaseTree.Tree(root, rooted=False)
        elif len(dm) == 2:
            # minimum distance will always be [1,0]
            min_i = 1
            min_j = 0
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            clade1.branch_length = dm[min_i, min_j] / 2.0
            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length
            inner_clade = BaseTree.Clade(None, "Inner")
            clades[0] = inner_clade
            root = clades[0]

            return BaseTree.Tree(root, rooted=False)

        mod = SourceModule("""
                          #include <stdio.h>
                          #include <stdlib.h>
                          __global__ void DeviceNodeDist(float *device_dm, float *device_node_dist, float N)
                              const int tid = threadIdx.y + blockIdx.y* blockDim.y;
                              if (tid >= N) return;
                              for(int i = 0; i< N; i++){
                                if(tid< i){
                                    device_node_dist[tid] += device_dm[(i*(i+1))/2 + tid];

                                    device_node_dist[tid] += device_dm[(tid*(tid+1))/2 + i];



                          device_node_dist[tid]= device_node_dist[tid]/ (N-2.0);

        while len(dm) > 2:

            # calculate nodeDist
            host_dm = []  # 1D list for distance matrix
            for list in dm.matrix:

            host_dm = np.array(host_dm)
            host_dm = host_dm.astype(np.float32)
            length = len(dm)
            host_node_dist = np.zeros((length,), dtype=float)
            host_node_dist = host_node_dist.astype(np.float32)

            ###GPU code
            start = cuda.Event()
            end = cuda.Event()

            # get the optimum block size based on dataset size
            if (length < 128):
                BLOCKSIZE = 128
            elif (length < 256):
                BLOCKSIZE = 256
            elif (length < 512):
                BLOCKSIZE = 512
                BLOCKSIZE = 1024

            ###Allocate GPU device memory
            device_dm = cuda.mem_alloc(host_dm.nbytes)
            device_node_dist = cuda.mem_alloc(host_node_dist.nbytes)

            ###Memcopy from host to device
            cuda.memcpy_htod(device_dm, host_dm)

            DeviceNodeDist = mod.get_function("DeviceNodeDist")

            blockDim = (1, BLOCKSIZE, 1)
            gridDim = (1, length / BLOCKSIZE + 1, 1)


            DeviceNodeDist(device_dm, device_node_dist, np.float32(length), block=blockDim, grid=gridDim)

            node_dist1 = np.empty_like(host_node_dist)
            cuda.memcpy_dtoh(node_dist1, device_node_dist)
            node_dist2 = node_dist1.tolist()
            node_dist[0:len(node_dist2)]= node_dist2

            del host_dm
            del host_node_dist

            #minimum distance calculation
            in_t2= time.time()
            min_dist = dm[1, 0] - node_dist[1] - node_dist[0]
            min_i = 0
            min_j = 1

            for i in range(1, len(dm)):
                for j in range(0, i):
                    temp = dm[i, j] - node_dist[i] - node_dist[j]
                    if min_dist > temp:
                        min_dist = temp
                        min_i = i
                        min_j = j

            total_time2+= time.time()- in_t2

            # create clade
            clade1 = clades[min_i]
            clade2 = clades[min_j]
            inner_count += 1
            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
            # assign branch length
            clade1.branch_length = (dm[min_i, min_j] + node_dist[min_i] -
                                    node_dist[min_j]) / 2.0
            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length

            # update node list
            clades[min_j] = inner_clade
            del clades[min_i]

            # rebuild distance matrix,
            # set the distances of new node at the index of min_j
            for k in range(0, len(dm)):
                if k != min_i and k != min_j:
                    dm[min_j, k] = (dm[min_i, k] + dm[min_j, k] -
                                    dm[min_i, min_j]) / 2.0

            dm.names[min_j] = "Inner" + str(inner_count)
            del dm[min_i]

        # set the last clade as one of the child of the inner_clade
        root = None
        if clades[0] == inner_clade:
            clades[0].branch_length = 0
            clades[1].branch_length = dm[1, 0]
            root = clades[0]
            clades[0].branch_length = dm[1, 0]
            clades[1].branch_length = 0
            root = clades[1]

        return BaseTree.Tree(root, rooted=False)