def print_random_tree(num_nodes=5): """ Doc Doc Doc """ t = Tree() t.populate(num_nodes) print("t", t) print("children", t.children) print("get_children", t.get_children()) print("up", t.up) print("name", t.name) print("dist", t.dist) print("is_leaf", t.is_leaf()) print("get_tree_root", t.get_tree_root()) print("children[0].get_tree_root", t.children[0].get_tree_root()) print("children[0].children[0].get_tree_root", t.children[0].children[0].get_tree_root()) for leaf in t: print(leaf.name)
from ete3 import Tree t = Tree() # We create a random tree topology t.populate(15) print t print t.children print t.get_children() print t.up print t.name print t.dist print t.is_leaf() print t.get_tree_root() print t.children[0].get_tree_root() print t.children[0].children[0].get_tree_root() # You can also iterate over tree leaves using a simple syntax for leaf in t: print leaf.name
class InfoCluster: # pylint: disable=too-many-instance-attributes '''Info clustering is a kind of hierarchical clustering method. It computes principal sequence of partition to build the hierarchical tree. Parameters ---------- gamma : float, default=1.0 Kernel coefficient for rbf kernels. affinity : string or list, default 'rbf' may be one of 'precomputed', 'rbf', 'laplacian', 'nearest_neighbors'. if list, can only be ['rbf','nearest_neighbors'] or ['laplacian', 'nearest_neighbors'] n_neighbors : integer Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for ``affinity='rbf'``. ''' def __init__(self, gamma=1, affinity='rbf', n_neighbors=10): self._gamma = gamma self.affinity = affinity self.n_neighbors = n_neighbors self.tree = Tree() self.tree_depth = 0 self.g = None self.critical_values = [] self.partition_list = [] self.num_points = 0 def fit(self, X, initialize_tree=True): # pylint: disable=too-many-arguments '''Construct an affinity graph from X using rbf kernel function, then applies info clustering to this affinity graph. Parameters ---------- X : array-like, shape (n_samples, n_features) if affinity='precomputed', X is networkx like object or affinity matrix(upper triangle) ''' self.tree = Tree() # clear the tree self._init_g(X) self.g.run() self.critical_values = self.g.get_critical_values() self.partition_list = self.g.get_partitions() self.num_points = len(self.partition_list[-1]) if initialize_tree: self._get_hierachical_tree() def fit_predict(self, X): '''fit''' self.fit(X) def _add_node(self, root, node_list, num_index): root.add_features(cv=self.critical_values[num_index - 1]) label_list = self._partition_to_category( self.partition_list[num_index]) cat_list = [] for i in node_list: if cat_list.count(label_list[i]) == 0: cat_list.append(label_list[i]) max_cat = len(cat_list) label_list_list = [[] for i in range(max_cat)] for i in node_list: j = cat_list.index(label_list[i]) label_list_list[j].append(i) for node_list_i in label_list_list: node_name = ''.join([str(ii) for ii in node_list_i]) if node_name != root.name: root_i = root.add_child(name=node_name) else: root_i = root if len(node_list_i) > 1: self._add_node(root_i, node_list_i, num_index + 1) def _partition_to_category(self, partition): cat = np.zeros(self.num_points) label_index = 0 for i in partition: for j in i: cat[j] = label_index label_index += 1 return cat def _get_hierachical_tree(self): max_num = self.num_points node_list = list(range(0, max_num)) self._add_node(self.tree, node_list, 1) def _set_tree_depth(self, node, depth): if node.is_leaf(): if depth > self.tree_depth: self.tree_depth = depth return for node_i in node.children: # depth first search self._set_tree_depth(node_i, depth + 1) def get_tree_depth(self): '''get clustering tree depth''' if self.tree.is_leaf(): self._get_hierachical_tree() if self.tree_depth != 0: return self.tree_depth self._set_tree_depth(self.tree, 0) return self.tree_depth def print_hierarchical_tree(self): '''print the hirechical tree of clustering result ''' if self.tree.is_leaf(): self._get_hierachical_tree() print(self.tree) def get_category(self, min_num): '''get the clustering labels with the number of clusters no smaller than i Parameters ---------- min_num : int, minimal number of cluster Returns -------- list, with each element of the list denoting the label of the cluster. ''' partition = self.get_partition(min_num) return self._partition_to_category(partition) def get_partition(self, min_num): ''' return the index of partition whose first element is no smaller than min_num, ''' for i in self.partition_list: if len(i) >= min_num: return i raise ValueError('cluster with min num %d not found' % min_num) def _init_g(self, X): # pylint: disable=too-many-branches is_nx_graph = False if isinstance(X, list): n_samples = len(X) elif isinstance(X, np.ndarray): n_samples = X.shape[0] elif isinstance(X, (nx.Graph, nx.DiGraph)): n_samples = nx.number_of_nodes(X) is_nx_graph = True else: raise TypeError('type(X) must be list, numpy.ndarray, ' 'networkx.Graph or networkx.DiGraph') sim_list = [] if not is_nx_graph: if self.affinity == 'precomputed': affinity_matrix = X elif self.affinity == 'nearest_neighbors': connectivity = kneighbors_graph(X, n_neighbors=self.n_neighbors, include_self=True) affinity_matrix = connectivity.todense() elif self.affinity == 'laplacian': affinity_matrix = pairwise_kernels(X, metric='laplacian', gamma=self._gamma) elif self.affinity == 'rbf': affinity_matrix = pairwise_kernels(X, metric='rbf', gamma=self._gamma) elif not isinstance(self.affinity, str): if self.affinity.count('nearest_neighbors') == 0: raise ValueError( "affinity list should specify nearest_neighbors") connectivity = kneighbors_graph(X, n_neighbors=self.n_neighbors, include_self=True) if self.affinity.count('laplacian') > 0: affinity_matrix = pairwise_kernels(X, metric='laplacian', gamma=self._gamma) elif self.affinity.count('rbf') > 0: affinity_matrix = pairwise_kernels(X, metric='rbf', gamma=self._gamma) else: raise ValueError( "affinity list should specify laplacian or rbf") affinity_matrix = np.multiply(affinity_matrix, connectivity.todense()) else: raise NameError("Unknown affinity name %s" % self.affinity) for s_i in range(n_samples): for s_j in range(s_i + 1, n_samples): sim_list.append((s_i, s_j, affinity_matrix[s_i, s_j])) else: for s_i, s_j, weight_dic in X.edges(data=True): s_ii = int(s_i) s_jj = int(s_j) if s_ii < s_jj: sim_list.append((s_ii, s_jj, weight_dic['weight'])) self.g = PsPartition(n_samples, sim_list)
class GN: def __init__(self): self.reinit() def reinit(self): self.partition_num_list = [] self.partition_list = [] self.tree = Tree() self.tree_depth = 0 def fit(self, G_outer, initialize_tree=True): ''' G_outer: nx.Graph like object returns the partition ''' self.reinit() self.G = G_outer.copy() labels_list = gn_inner_routine(self.G) self.partition_list = label_list_to_partition_list(labels_list) self.partition_num_list = [len(i) for i in self.partition_list] if (initialize_tree): self._get_hierarchical_tree() return self def get_category(self, i): index = 0 for ind, val in enumerate(self.partition_num_list): if (val >= i): index = ind break cat = np.zeros(len(self.G.nodes)) t = 0 for j in self.partition_list[index]: for r in j: cat[r] = t t += 1 return cat def get_tree_depth(self): return 0 def _add_node(self, root, node_list, num_index): label_list = self.get_category(self.partition_num_list[num_index]) cat_list = [] for i in node_list: if (cat_list.count(label_list[i]) == 0): cat_list.append(label_list[i]) max_cat = len(cat_list) label_list_list = [[] for i in range(max_cat)] for i in node_list: j = cat_list.index(label_list[i]) label_list_list[j].append(i) for node_list_i in label_list_list: node_name = ''.join([str(ii) for ii in node_list_i]) if (node_name != root.name): root_i = root.add_child(name=node_name) else: root_i = root if (len(node_list_i) > 1): self._add_node(root_i, node_list_i, num_index + 1) def _get_hierarchical_tree(self): max_num = self.partition_num_list[-1] node_list = [i for i in range(0, max_num)] self._add_node(self.tree, node_list, 1) def _set_tree_depth(self, node, depth): if (node.is_leaf()): if (depth > self.tree_depth): self.tree_depth = depth return for node_i in node.children: # depth first search self._set_tree_depth(node_i, depth + 1) def get_tree_depth(self): if (self.tree.is_leaf()): self._get_hierarchical_tree() if (self.tree_depth != 0): return self.tree_depth self._set_tree_depth(self.tree, 0) return self.tree_depth
class GN_OLD: def __init__(self): self.reinit() def reinit(self): self.partition_num_list = [] self.partition_list = [] self.tree = Tree() self.tree_depth = 0 def fit(self, G_outer, initialize_tree=True): ''' G_outer: nx.Graph like object returns the partition ''' self.reinit() self.G = G_outer.copy() G = G_outer.copy() # copy the graph n = G.number_of_nodes() #|V| A = nx.adj_matrix(G) # adjacenct matrix m_ = 0.0 # the weighted version for number of edges for i in range(0, n): for j in range(0, n): m_ += A[i, j] self.m_ = m_ / 2.0 # calculate the weighted degree for each node Orig_deg = {} self.Orig_deg = cmty.UpdateDeg(A, G.nodes()) # run Newman alg self.runGirvanNewman() if (initialize_tree): self._get_hierarchical_tree() return self def runGirvanNewman(self): # let's find the best split of the graph BestQ = 0.0 Q = 0.0 self.partition_num_list.append(1) nvertices = len(self.G.nodes) self.partition_list.append([set(i for i in range(nvertices))]) while True: cmty.CmtyGirvanNewmanStep(self.G) partition = list(nx.connected_components(self.G)) self.partition_num_list.append(len(partition)) self.partition_list.append(partition) Q = cmty._GirvanNewmanGetModularity(self.G, self.Orig_deg, self.m_) if Q > BestQ: BestQ = Q Bestcomps = partition # Best Split if self.G.number_of_edges() == 0: break if BestQ > 0.0: self.Bestcomps = Bestcomps def get_category(self, i): index = 0 for ind, val in enumerate(self.partition_num_list): if (val >= i): index = ind break cat = np.zeros(len(self.Orig_deg)) t = 0 for j in self.partition_list[index]: for r in j: cat[r] = t t += 1 return cat def get_tree_depth(self): return 0 def _add_node(self, root, node_list, num_index): label_list = self.get_category(self.partition_num_list[num_index]) cat_list = [] for i in node_list: if (cat_list.count(label_list[i]) == 0): cat_list.append(label_list[i]) max_cat = len(cat_list) label_list_list = [[] for i in range(max_cat)] for i in node_list: j = cat_list.index(label_list[i]) label_list_list[j].append(i) for node_list_i in label_list_list: node_name = ''.join([str(ii) for ii in node_list_i]) if (node_name != root.name): root_i = root.add_child(name=node_name) else: root_i = root if (len(node_list_i) > 1): self._add_node(root_i, node_list_i, num_index + 1) def _get_hierarchical_tree(self): max_num = self.partition_num_list[-1] node_list = [i for i in range(0, max_num)] self._add_node(self.tree, node_list, 1) def _set_tree_depth(self, node, depth): if (node.is_leaf()): if (depth > self.tree_depth): self.tree_depth = depth return for node_i in node.children: # depth first search self._set_tree_depth(node_i, depth + 1) def get_tree_depth(self): if (self.tree.is_leaf()): self._get_hierarchical_tree() if (self.tree_depth != 0): return self.tree_depth self._set_tree_depth(self.tree, 0) return self.tree_depth