def print_random_tree(num_nodes=5):
    """
    Doc Doc Doc
    """

    t = Tree()
    t.populate(num_nodes)

    print("t", t)
    print("children", t.children)
    print("get_children", t.get_children())
    print("up", t.up)
    print("name", t.name)
    print("dist", t.dist)
    print("is_leaf", t.is_leaf())
    print("get_tree_root", t.get_tree_root())
    print("children[0].get_tree_root", t.children[0].get_tree_root())
    print("children[0].children[0].get_tree_root",
          t.children[0].children[0].get_tree_root())
    for leaf in t:
        print(leaf.name)
示例#2
0
文件: etetest.py 项目: Tancata/phylo
from ete3 import Tree
t = Tree()
# We create a random tree topology
t.populate(15)
print t
print t.children
print t.get_children()
print t.up
print t.name
print t.dist
print t.is_leaf()
print t.get_tree_root()
print t.children[0].get_tree_root()
print t.children[0].children[0].get_tree_root()
# You can also iterate over tree leaves using a simple syntax
for leaf in t:
      print leaf.name
示例#3
0
class InfoCluster:  # pylint: disable=too-many-instance-attributes
    '''Info clustering is a kind of hierarchical clustering method.
    It computes principal sequence of partition to build the hierarchical tree.

    Parameters
    ----------
    gamma : float, default=1.0
        Kernel coefficient for rbf kernels.
    affinity : string or list, default 'rbf'
        may be one of 'precomputed', 'rbf', 'laplacian', 'nearest_neighbors'.
        if list, can only be ['rbf','nearest_neighbors'] or ['laplacian', 'nearest_neighbors']
    n_neighbors : integer
        Number of neighbors to use when constructing the affinity matrix using
        the nearest neighbors method. Ignored for ``affinity='rbf'``.
    '''
    def __init__(self, gamma=1, affinity='rbf', n_neighbors=10):
        self._gamma = gamma
        self.affinity = affinity
        self.n_neighbors = n_neighbors
        self.tree = Tree()
        self.tree_depth = 0
        self.g = None
        self.critical_values = []
        self.partition_list = []
        self.num_points = 0

    def fit(self, X, initialize_tree=True):  # pylint: disable=too-many-arguments
        '''Construct an affinity graph from X using rbf kernel function,
        then applies info clustering to this affinity graph.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            if affinity='precomputed', X is networkx like object or affinity matrix(upper triangle)
        '''
        self.tree = Tree()  # clear the tree
        self._init_g(X)
        self.g.run()
        self.critical_values = self.g.get_critical_values()
        self.partition_list = self.g.get_partitions()
        self.num_points = len(self.partition_list[-1])
        if initialize_tree:
            self._get_hierachical_tree()

    def fit_predict(self, X):
        '''fit'''
        self.fit(X)

    def _add_node(self, root, node_list, num_index):
        root.add_features(cv=self.critical_values[num_index - 1])
        label_list = self._partition_to_category(
            self.partition_list[num_index])
        cat_list = []
        for i in node_list:
            if cat_list.count(label_list[i]) == 0:
                cat_list.append(label_list[i])
        max_cat = len(cat_list)
        label_list_list = [[] for i in range(max_cat)]
        for i in node_list:
            j = cat_list.index(label_list[i])
            label_list_list[j].append(i)
        for node_list_i in label_list_list:
            node_name = ''.join([str(ii) for ii in node_list_i])
            if node_name != root.name:
                root_i = root.add_child(name=node_name)
            else:
                root_i = root
            if len(node_list_i) > 1:
                self._add_node(root_i, node_list_i, num_index + 1)

    def _partition_to_category(self, partition):
        cat = np.zeros(self.num_points)
        label_index = 0
        for i in partition:
            for j in i:
                cat[j] = label_index
            label_index += 1
        return cat

    def _get_hierachical_tree(self):
        max_num = self.num_points
        node_list = list(range(0, max_num))
        self._add_node(self.tree, node_list, 1)

    def _set_tree_depth(self, node, depth):
        if node.is_leaf():
            if depth > self.tree_depth:
                self.tree_depth = depth
            return
        for node_i in node.children:  # depth first search
            self._set_tree_depth(node_i, depth + 1)

    def get_tree_depth(self):
        '''get clustering tree depth'''
        if self.tree.is_leaf():
            self._get_hierachical_tree()
        if self.tree_depth != 0:
            return self.tree_depth
        self._set_tree_depth(self.tree, 0)
        return self.tree_depth

    def print_hierarchical_tree(self):
        '''print the hirechical tree of clustering result
        '''
        if self.tree.is_leaf():
            self._get_hierachical_tree()
        print(self.tree)

    def get_category(self, min_num):
        '''get the clustering labels with the number of clusters no smaller than i
        Parameters
        ----------
        min_num : int, minimal number of cluster

        Returns
        --------
        list, with each element of the list denoting the label of the cluster.
        '''
        partition = self.get_partition(min_num)
        return self._partition_to_category(partition)

    def get_partition(self, min_num):
        '''
        return the index of partition whose first element is no smaller than min_num,
        '''
        for i in self.partition_list:
            if len(i) >= min_num:
                return i
        raise ValueError('cluster with min num %d not found' % min_num)

    def _init_g(self, X):  # pylint: disable=too-many-branches
        is_nx_graph = False
        if isinstance(X, list):
            n_samples = len(X)
        elif isinstance(X, np.ndarray):
            n_samples = X.shape[0]
        elif isinstance(X, (nx.Graph, nx.DiGraph)):
            n_samples = nx.number_of_nodes(X)
            is_nx_graph = True
        else:
            raise TypeError('type(X) must be list, numpy.ndarray, '
                            'networkx.Graph or networkx.DiGraph')
        sim_list = []
        if not is_nx_graph:
            if self.affinity == 'precomputed':
                affinity_matrix = X
            elif self.affinity == 'nearest_neighbors':
                connectivity = kneighbors_graph(X,
                                                n_neighbors=self.n_neighbors,
                                                include_self=True)
                affinity_matrix = connectivity.todense()
            elif self.affinity == 'laplacian':
                affinity_matrix = pairwise_kernels(X,
                                                   metric='laplacian',
                                                   gamma=self._gamma)
            elif self.affinity == 'rbf':
                affinity_matrix = pairwise_kernels(X,
                                                   metric='rbf',
                                                   gamma=self._gamma)
            elif not isinstance(self.affinity, str):
                if self.affinity.count('nearest_neighbors') == 0:
                    raise ValueError(
                        "affinity list should specify nearest_neighbors")
                connectivity = kneighbors_graph(X,
                                                n_neighbors=self.n_neighbors,
                                                include_self=True)
                if self.affinity.count('laplacian') > 0:
                    affinity_matrix = pairwise_kernels(X,
                                                       metric='laplacian',
                                                       gamma=self._gamma)
                elif self.affinity.count('rbf') > 0:
                    affinity_matrix = pairwise_kernels(X,
                                                       metric='rbf',
                                                       gamma=self._gamma)
                else:
                    raise ValueError(
                        "affinity list should specify laplacian or rbf")
                affinity_matrix = np.multiply(affinity_matrix,
                                              connectivity.todense())
            else:
                raise NameError("Unknown affinity name %s" % self.affinity)
            for s_i in range(n_samples):
                for s_j in range(s_i + 1, n_samples):
                    sim_list.append((s_i, s_j, affinity_matrix[s_i, s_j]))
        else:
            for s_i, s_j, weight_dic in X.edges(data=True):
                s_ii = int(s_i)
                s_jj = int(s_j)
                if s_ii < s_jj:
                    sim_list.append((s_ii, s_jj, weight_dic['weight']))

        self.g = PsPartition(n_samples, sim_list)
示例#4
0
from ete3 import Tree
t = Tree()
# We create a random tree topology
t.populate(15)
print t
print t.children
print t.get_children()
print t.up
print t.name
print t.dist
print t.is_leaf()
print t.get_tree_root()
print t.children[0].get_tree_root()
print t.children[0].children[0].get_tree_root()
# You can also iterate over tree leaves using a simple syntax
for leaf in t:
    print leaf.name
示例#5
0
class GN:
    def __init__(self):
        self.reinit()

    def reinit(self):
        self.partition_num_list = []
        self.partition_list = []
        self.tree = Tree()
        self.tree_depth = 0

    def fit(self, G_outer, initialize_tree=True):
        '''
            G_outer: nx.Graph like object
            returns the partition
        '''
        self.reinit()
        self.G = G_outer.copy()

        labels_list = gn_inner_routine(self.G)
        self.partition_list = label_list_to_partition_list(labels_list)
        self.partition_num_list = [len(i) for i in self.partition_list]
        if (initialize_tree):
            self._get_hierarchical_tree()
        return self

    def get_category(self, i):
        index = 0
        for ind, val in enumerate(self.partition_num_list):
            if (val >= i):
                index = ind
                break
        cat = np.zeros(len(self.G.nodes))
        t = 0
        for j in self.partition_list[index]:
            for r in j:
                cat[r] = t
            t += 1
        return cat

    def get_tree_depth(self):
        return 0

    def _add_node(self, root, node_list, num_index):

        label_list = self.get_category(self.partition_num_list[num_index])
        cat_list = []
        for i in node_list:
            if (cat_list.count(label_list[i]) == 0):
                cat_list.append(label_list[i])
        max_cat = len(cat_list)
        label_list_list = [[] for i in range(max_cat)]
        for i in node_list:
            j = cat_list.index(label_list[i])
            label_list_list[j].append(i)
        for node_list_i in label_list_list:
            node_name = ''.join([str(ii) for ii in node_list_i])
            if (node_name != root.name):
                root_i = root.add_child(name=node_name)
            else:
                root_i = root
            if (len(node_list_i) > 1):
                self._add_node(root_i, node_list_i, num_index + 1)

    def _get_hierarchical_tree(self):
        max_num = self.partition_num_list[-1]
        node_list = [i for i in range(0, max_num)]
        self._add_node(self.tree, node_list, 1)

    def _set_tree_depth(self, node, depth):
        if (node.is_leaf()):
            if (depth > self.tree_depth):
                self.tree_depth = depth
            return
        for node_i in node.children:  # depth first search
            self._set_tree_depth(node_i, depth + 1)

    def get_tree_depth(self):
        if (self.tree.is_leaf()):
            self._get_hierarchical_tree()
        if (self.tree_depth != 0):
            return self.tree_depth
        self._set_tree_depth(self.tree, 0)
        return self.tree_depth
示例#6
0
class GN_OLD:
    def __init__(self):
        self.reinit()

    def reinit(self):
        self.partition_num_list = []
        self.partition_list = []
        self.tree = Tree()
        self.tree_depth = 0

    def fit(self, G_outer, initialize_tree=True):
        '''
            G_outer: nx.Graph like object
            returns the partition
        '''
        self.reinit()
        self.G = G_outer.copy()
        G = G_outer.copy()  # copy the graph
        n = G.number_of_nodes()  #|V|
        A = nx.adj_matrix(G)  # adjacenct matrix

        m_ = 0.0  # the weighted version for number of edges
        for i in range(0, n):
            for j in range(0, n):
                m_ += A[i, j]
        self.m_ = m_ / 2.0

        # calculate the weighted degree for each node
        Orig_deg = {}
        self.Orig_deg = cmty.UpdateDeg(A, G.nodes())

        # run Newman alg
        self.runGirvanNewman()
        if (initialize_tree):
            self._get_hierarchical_tree()
        return self

    def runGirvanNewman(self):
        # let's find the best split of the graph
        BestQ = 0.0
        Q = 0.0
        self.partition_num_list.append(1)
        nvertices = len(self.G.nodes)
        self.partition_list.append([set(i for i in range(nvertices))])
        while True:
            cmty.CmtyGirvanNewmanStep(self.G)
            partition = list(nx.connected_components(self.G))
            self.partition_num_list.append(len(partition))
            self.partition_list.append(partition)
            Q = cmty._GirvanNewmanGetModularity(self.G, self.Orig_deg, self.m_)
            if Q > BestQ:
                BestQ = Q
                Bestcomps = partition  # Best Split
            if self.G.number_of_edges() == 0:
                break
        if BestQ > 0.0:
            self.Bestcomps = Bestcomps

    def get_category(self, i):
        index = 0
        for ind, val in enumerate(self.partition_num_list):
            if (val >= i):
                index = ind
                break
        cat = np.zeros(len(self.Orig_deg))
        t = 0
        for j in self.partition_list[index]:
            for r in j:
                cat[r] = t
            t += 1
        return cat

    def get_tree_depth(self):
        return 0

    def _add_node(self, root, node_list, num_index):

        label_list = self.get_category(self.partition_num_list[num_index])
        cat_list = []
        for i in node_list:
            if (cat_list.count(label_list[i]) == 0):
                cat_list.append(label_list[i])
        max_cat = len(cat_list)
        label_list_list = [[] for i in range(max_cat)]
        for i in node_list:
            j = cat_list.index(label_list[i])
            label_list_list[j].append(i)
        for node_list_i in label_list_list:
            node_name = ''.join([str(ii) for ii in node_list_i])
            if (node_name != root.name):
                root_i = root.add_child(name=node_name)
            else:
                root_i = root
            if (len(node_list_i) > 1):
                self._add_node(root_i, node_list_i, num_index + 1)

    def _get_hierarchical_tree(self):
        max_num = self.partition_num_list[-1]
        node_list = [i for i in range(0, max_num)]
        self._add_node(self.tree, node_list, 1)

    def _set_tree_depth(self, node, depth):
        if (node.is_leaf()):
            if (depth > self.tree_depth):
                self.tree_depth = depth
            return
        for node_i in node.children:  # depth first search
            self._set_tree_depth(node_i, depth + 1)

    def get_tree_depth(self):
        if (self.tree.is_leaf()):
            self._get_hierarchical_tree()
        if (self.tree_depth != 0):
            return self.tree_depth
        self._set_tree_depth(self.tree, 0)
        return self.tree_depth