kmeans = KMeans(n_clusters=2, random_state=0, precompute_distances=True, tol=1e-10).fit(dis.values) kmeans.labels_ id2info = defaultdict(list) for idx, id in enumerate(dis.index): new_name = convert_genome_ID_rev(id.split('_')[0]) + '_' + id id2info[new_name] = [str(kmeans.labels_[idx])] from api_tools.itol_func import * text = to_binary_shape(id2info, {'1': {}, '0': {}}) with open('../itol_txt/separate_tmp.txt', 'w') as f1: f1.write(text) t = PhyloTree(intree) # t.set_outgroup(t.get_midpoint_outgroup()) t.set_species_naming_function( lambda node: convert_genome_ID_rev(node.name.split('_')[0])) print(t.get_ascii(attributes=["name", "species"], show_internal=False)) t2 = t.collapse_lineage_specific_expansions() ntrees, ndups, sptrees = t2.get_speciation_trees() sptrees = list(sptrees) print("Found %d species trees and %d duplication nodes" % (ntrees, ndups)) for spt in sptrees: print(len(spt.get_leaf_names()))