def createSampleData(self): # hardcoded data for now tree = ClusterTree('(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);') leaves = tree.get_leaf_names() idx_dict = {'A':0,'B':1,'C':2,'D':3} idx_labels = sorted(idx_dict, key=idx_dict.get) dmat = np.zeros((4,4)) for l1,l2 in combinations(leaves,2): d = tree.get_distance(l1,l2) dmat[idx_dict[l1],idx_dict[l2]] = dmat[idx_dict[l2],idx_dict[l1]] = d self.X = dmat
def newick(self, user_input_file): tree = ClusterTree(user_input_file) leaves = tree.get_leaf_names() ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.show_branch_support = True leaf_dict = {} # Convert leaves (a list) into a dictionary for i in range(len(leaves)): leaf_dict[leaves[i]] = i i = i + 1 # Cast dictionary attributes as list and create index labels k = list(leaf_dict.keys()) v = list(leaf_dict.values()) w = list(leaf_dict.items()) leaf_labels = [k[v.index(j)] for j in range(0, len(w))] # Create a numpy array of zeros based on the number of taxa in the tree dmat = np.zeros((len(leaves), len(leaves))) print('Converting input tree:') # Compute distance matrix from newick tree (this is not yet a linked distance matrix) for l1, l2 in tqdm(combinations(leaves, 2)): d = tree.get_distance(l1, l2) dmat[leaf_dict[l1], leaf_dict[l2]] = dmat[leaf_dict[l2], leaf_dict[l1]] = d # Convert dmat into a linkage distance matrix for scipy schlink = sch.linkage(scipy.spatial.distance.squareform(dmat), method='average', metric='euclidean') return dmat, schlink
def newick_to_linkage(filePath): """ converts newick tree to scipy linkage matrix """ tree = ClusterTree(filePath) leaves = tree.get_leaf_names() ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.show_branch_support = True idx_dict = {} idx = 0 for leaf in leaves: idx_dict[leaf] = idx idx += 1 idx_labels = [idx_dict.keys()[idx_dict.values().index(i)] for i in range(len(idx_dict))] dmat = np.zeros((len(leaves), len(leaves))) # FIXME need to understand for leaf1, leaf2 in combinations(leaves, 2): d = tree.get_distance(leaf1, leaf2) dmat[idx_dict[leaf1], idx_dict[leaf2]] = dmat[idx_dict[leaf2], idx_dict[leaf1]] = d schlink = sch.linkage(scipy.spatial.distance.squareform(dmat),method='average',metric='euclidean')