def simulation_h2m(msc, num_iters=len(msc)): leaf2clusters = trees.bottomup2topdown_tree_converter(msc) results = {} #{index-name: list of results} msc2 = msc for i in xrange(0, num_iters): print "Iteration", i, "out of", num_iters leaf2clusters2 = trees.bottomup2topdown_tree_converter(msc2) indexes_dict = get_indexes_dict(leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes) #print indexes_dict for id, val in indexes_dict.iteritems(): results[id] = results.get(id, []) + [val] msc3 = [] nummodifications = 0 for h in msc2: if len(h) == 1 or nummodifications >= MAX_MODIFICATIONS: msc3.append(h) else: new_h = [] for m in h: new_h.extend(m) msc3.append([new_h]) nummodifications = nummodifications + 1 #print msc3 msc2 = msc3 return results
def simulation_h2m(msc, num_iters = len(msc)): leaf2clusters = trees.bottomup2topdown_tree_converter(msc) results = {} #{index-name: list of results} msc2 = msc for i in xrange(0,num_iters): print "Iteration",i,"out of",num_iters leaf2clusters2 = trees.bottomup2topdown_tree_converter(msc2) indexes_dict = get_indexes_dict(leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes) #print indexes_dict for id, val in indexes_dict.iteritems(): results[id] = results.get(id,[])+[val] msc3 = [] nummodifications = 0 for h in msc2: if len(h) == 1 or nummodifications >= MAX_MODIFICATIONS: msc3.append(h) else: new_h = [] for m in h: new_h.extend(m) msc3.append([new_h]) nummodifications = nummodifications + 1 #print msc3 msc2 = msc3 return results
def flat_comparision(msc): leaf2clusters = trees.bottomup2topdown_tree_converter(msc) print "-------------------------------------------------------" print "Number of nodes at H level:",len(msc) leaves = [] for h in msc: for m in h: for l in m: leaves.append(l) print 'Extracted leaves:',str(leaves)[:200],"..." print "-------------------------------------------------------" msc2 = [[leaves]] leaf2clusters2 = trees.bottomup2topdown_tree_converter(msc2) print "For tree build of single leaves:",get_indexes_dict(leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes)
def flat_comparision(msc): leaf2clusters = trees.bottomup2topdown_tree_converter(msc) print "-------------------------------------------------------" print "Number of nodes at H level:", len(msc) leaves = [] for h in msc: for m in h: for l in m: leaves.append(l) print 'Extracted leaves:', str(leaves)[:200], "..." print "-------------------------------------------------------" msc2 = [[leaves]] leaf2clusters2 = trees.bottomup2topdown_tree_converter(msc2) print "For tree build of single leaves:", get_indexes_dict( leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes)
def generate_3level_tree(sim_matrix_l, clustering_l, similarity_aggregator_m, clustering_m): """Returns 3level tree generated using similarity matrix=sim_matrix_l, given clustering methods and similarity matrix aggregation method.""" #logging.info("[generate_3level_tree] --------------------------------------------------------") logging.info("[generate_3level_tree] Clustering L-level (xxyzz) (method:"+str(clustering_l)+")...") assignment_l = clustering_l(sim_matrix_l) #sil = silhouettes(sim_matrix_l, range(2,len(sim_matrix_l),1), upgma.upgma_clustering, f=avgmax) #sil2 = dict( (s,k) for k,s in sil.iteritems() ) logging.info("[generate_3level_tree] assignment_l = "+str(assignment_l)[:200]) #logging.info("[generate_3level_tree] --------------------------------------------------------") logging.info("[generate_3level_tree] Aggregating similarity matrix on M-level (aggregator:"+str(similarity_aggregator_m)+")...") sim_matrix_m = sim_matrix.aggregate_similarity_matrix_a(sim_matrix_l, assignment_l, similarity_aggregator_m) logging.info("[generate_3level_tree] sim_matrix_m of size "+str(len(sim_matrix_m))+"x"+str(len(sim_matrix_m[0]))) logging.info("[generate_3level_tree] \n"+str(numpy.array(sim_matrix_m))[:500]) #logging.info("[generate_3level_tree] --------------------------------------------------------") logging.info("[generate_3level_tree] Clustering M-level (xxy) (method:"+str(clustering_m)+")...") assignment_m = clustering_m(sim_matrix_m) logging.info("[generate_3level_tree] assignment_m = "+str(assignment_m)[:200]) #logging.info("[generate_3level_tree] --------------------------------------------------------") logging.info("[generate_3level_tree] Building 3level tree with assignment_l and assignment_m") new_tree = trees.build_3level_tree(assignment_l, assignment_m) new_leaf2clusters = trees.bottomup2topdown_tree_converter(new_tree) return new_leaf2clusters,new_tree
def B_using_tree(tree, bonding_calc = lambda common_path_fraction: common_path_fraction): """Generates bonding matrix for given tree. tree - description of a tree (given as a list of lists of lists...) For additional documentation see: B_using_tree_l2c. Sample use: >>> B_using_tree([ [[['a','b'], ['c']] , [['d','e','f'],['g','h']]], [[['x']],[['y']]] ], bonding_calc = lambda common_path_fraction: common_path_fraction*4.0) == [[4, 3, 2, 1, 1, 1, 1, 1, 0, 0], [3, 4, 2, 1, 1, 1, 1, 1, 0, 0], [2, 2, 4, 1, 1, 1, 1, 1, 0, 0], [1, 1, 1, 4, 3, 3, 2, 2, 0, 0], [1, 1, 1, 3, 4, 3, 2, 2, 0, 0], [1, 1, 1, 3, 3, 4, 2, 2, 0, 0], [1, 1, 1, 2, 2, 2, 4, 3, 0, 0], [1, 1, 1, 2, 2, 2, 3, 4, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 4, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 4]] True """ leaf2clusters = trees.bottomup2topdown_tree_converter(tree) return B_using_tree_l2c(leaf2clusters, bonding_calc)
def M_dictionary(tree, membership_calc = lambda common_levels: common_levels): """Generates membership dictionary{leaf:membership-vector} for given tree. tree - description of a tree (given as a list of lists of lists...) For additional documentation see: M_dictionary_l2c. Sample use: >>> sorted(list(M_dictionary([ [[['a','b'], ['c']] , [['d','e','f'],['g','h']]], [[['x']],[['y']]] ]).iteritems())) [('a', [3, 2, 1, 1, 0, 0]), ('b', [3, 2, 1, 1, 0, 0]), ('c', [2, 3, 1, 1, 0, 0]), ('d', [1, 1, 3, 2, 0, 0]), ('e', [1, 1, 3, 2, 0, 0]), ('f', [1, 1, 3, 2, 0, 0]), ('g', [1, 1, 2, 3, 0, 0]), ('h', [1, 1, 2, 3, 0, 0]), ('x', [0, 0, 0, 0, 3, 1]), ('y', [0, 0, 0, 0, 1, 3])] """ #{leaf: descending-list-of-clusters} leaf2clusters = trees.bottomup2topdown_tree_converter(tree) return M_dictionary_l2c(leaf2clusters, membership_calc)
def M_dictionary(tree, membership_calc=lambda common_levels: common_levels): """Generates membership dictionary{leaf:membership-vector} for given tree. tree - description of a tree (given as a list of lists of lists...) For additional documentation see: M_dictionary_l2c. Sample use: >>> sorted(list(M_dictionary([ [[['a','b'], ['c']] , [['d','e','f'],['g','h']]], [[['x']],[['y']]] ]).iteritems())) [('a', [3, 2, 1, 1, 0, 0]), ('b', [3, 2, 1, 1, 0, 0]), ('c', [2, 3, 1, 1, 0, 0]), ('d', [1, 1, 3, 2, 0, 0]), ('e', [1, 1, 3, 2, 0, 0]), ('f', [1, 1, 3, 2, 0, 0]), ('g', [1, 1, 2, 3, 0, 0]), ('h', [1, 1, 2, 3, 0, 0]), ('x', [0, 0, 0, 0, 3, 1]), ('y', [0, 0, 0, 0, 1, 3])] """ #{leaf: descending-list-of-clusters} leaf2clusters = trees.bottomup2topdown_tree_converter(tree) return M_dictionary_l2c(leaf2clusters, membership_calc)
def compare_to_random_tree(msc_leaf2clusters, \ bonding_calc, membership_calc, membership_bonding,\ only_fast_calculations = False): leaves = list( msc_leaf2clusters ) rand_tree,num_l,num_m = get_random_tree2(leaves) rand_leaf2clusters = trees.bottomup2topdown_tree_converter(rand_tree) indexes_dict = tree_distance.get_indexes_dict(msc_leaf2clusters, rand_leaf2clusters, \ bonding_calc, membership_calc, membership_bonding,\ only_fast_calculations) #print indexes_dict return (num_l, num_m, indexes_dict)
def _comparision_report_(T, T2): """Prints comparsion results for two trees: T and T2.""" print "------------------------------------------------------" print "Tree1:", T print "Tree2:", T2 bonding_calc = lambda common_path_fraction: common_path_fraction membership_calc = lambda common_levels: common_levels / 2.0 membership_bonding = angular_bonding leaf2clusters = trees.bottomup2topdown_tree_converter(T) leaf2clusters2 = trees.bottomup2topdown_tree_converter(T2) indexes_dict = get_indexes_dict(leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, False) print indexes_dict #################################################### return print "Multilabelling example:---------------" M1 = [[0.67, 0.67, 0.33, 0.33, 0.67, 0.00], [0.33, 0.33, 0.67, 0.67, 0.33, 0.00], [0.00, 0.00, 0.00, 0.00, 0.00, 0.67]] B1 = B_using_membership(M1) M2 = [[0.33, 0.67, 0.33, 0.00, 0.00, 0.00], [0.33, 0.33, 0.67, 0.67, 0.33, 0.00], [0.00, 0.00, 0.00, 0.33, 0.67, 0.67]] B2 = B_using_membership(M2) M3 = [[0.33, 0.67, 0.67, 0.33, 0.33, 0.00], [0.33, 0.33, 0.67, 0.67, 0.33, 0.00], [0.00, 0.00, 0.00, 0.33, 0.67, 0.67]] B3 = B_using_membership(M3) print "HRI(M1,M2)", (1.0 - H_distance(B1, B2)) print "HRI(M2,M3)", (1.0 - H_distance(B2, B3)) print "HRI(M1,M3)", (1.0 - H_distance(B1, B3))
def _comparision_report_(T,T2): """Prints comparsion results for two trees: T and T2.""" print "------------------------------------------------------" print "Tree1:",T print "Tree2:",T2 bonding_calc = lambda common_path_fraction: common_path_fraction membership_calc = lambda common_levels: common_levels/2.0 membership_bonding = angular_bonding leaf2clusters = trees.bottomup2topdown_tree_converter(T) leaf2clusters2 = trees.bottomup2topdown_tree_converter(T2) indexes_dict = get_indexes_dict(leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, False) print indexes_dict #################################################### return print "Multilabelling example:---------------" M1 = [[0.67,0.67,0.33,0.33,0.67,0.00], [0.33,0.33,0.67,0.67,0.33,0.00], [0.00,0.00,0.00,0.00,0.00,0.67]] B1 = B_using_membership(M1) M2 = [[0.33,0.67,0.33,0.00,0.00,0.00], [0.33,0.33,0.67,0.67,0.33,0.00], [0.00,0.00,0.00,0.33,0.67,0.67]] B2 = B_using_membership(M2) M3 = [[0.33,0.67,0.67,0.33,0.33,0.00], [0.33,0.33,0.67,0.67,0.33,0.00], [0.00,0.00,0.00,0.33,0.67,0.67]] B3 = B_using_membership(M3) print "HRI(M1,M2)",(1.0-H_distance(B1,B2)) print "HRI(M2,M3)",(1.0-H_distance(B2,B3)) print "HRI(M1,M3)",(1.0-H_distance(B1,B3))
def generate_3level_tree(sim_matrix_l, clustering_l, similarity_aggregator_m, clustering_m): """Returns 3level tree generated using similarity matrix=sim_matrix_l, given clustering methods and similarity matrix aggregation method.""" #logging.info("[generate_3level_tree] --------------------------------------------------------") logging.info("[generate_3level_tree] Clustering L-level (xxyzz) (method:" + str(clustering_l) + ")...") assignment_l = clustering_l(sim_matrix_l) #sil = silhouettes(sim_matrix_l, range(2,len(sim_matrix_l),1), upgma.upgma_clustering, f=avgmax) #sil2 = dict( (s,k) for k,s in sil.iteritems() ) logging.info("[generate_3level_tree] assignment_l = " + str(assignment_l)[:200]) #logging.info("[generate_3level_tree] --------------------------------------------------------") logging.info( "[generate_3level_tree] Aggregating similarity matrix on M-level (aggregator:" + str(similarity_aggregator_m) + ")...") sim_matrix_m = sim_matrix.aggregate_similarity_matrix_a( sim_matrix_l, assignment_l, similarity_aggregator_m) logging.info("[generate_3level_tree] sim_matrix_m of size " + str(len(sim_matrix_m)) + "x" + str(len(sim_matrix_m[0]))) logging.info("[generate_3level_tree] \n" + str(numpy.array(sim_matrix_m))[:500]) #logging.info("[generate_3level_tree] --------------------------------------------------------") logging.info("[generate_3level_tree] Clustering M-level (xxy) (method:" + str(clustering_m) + ")...") assignment_m = clustering_m(sim_matrix_m) logging.info("[generate_3level_tree] assignment_m = " + str(assignment_m)[:200]) #logging.info("[generate_3level_tree] --------------------------------------------------------") logging.info( "[generate_3level_tree] Building 3level tree with assignment_l and assignment_m" ) new_tree = trees.build_3level_tree(assignment_l, assignment_m) new_leaf2clusters = trees.bottomup2topdown_tree_converter(new_tree) return new_leaf2clusters, new_tree
def get_random_tree_leaf2clusters(leaves, minpow = 0.25, maxpow = 0.75): """See: get_random_tree.""" rand_tree = get_random_tree(leaves, minpow, maxpow) rand_leaf2clusters = trees.bottomup2topdown_tree_converter(rand_tree) return rand_leaf2clusters,rand_tree
def self_comparision(msc): leaf2clusters = trees.bottomup2topdown_tree_converter(msc) print "-------------------------------------------------------" print "Tree compared to itself:", get_indexes_dict( leaf2clusters, leaf2clusters, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes)
def self_comparision(msc): leaf2clusters = trees.bottomup2topdown_tree_converter(msc) print "-------------------------------------------------------" print "Tree compared to itself:",get_indexes_dict(leaf2clusters, leaf2clusters, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes)
for msc in msc_codes: #print msc,"->",(not VALID_LEAF_PATTERN_RE.match(msc) is None) if not VALID_LEAF_PATTERN_RE.match(msc) is None: msc2count[msc] = msc2count.get(msc, 0)+1 print "Filtering for with MIN_COUNT_MSC:",MIN_COUNT_MSC," out of", sum(msc2count.values()) msc2count = dict((msc,count) for msc,count in msc2count.iteritems() if count>=MIN_COUNT_MSC) print "Building mapping msc2ix" msc2ix = dict((msc,ix) for ix,msc in enumerate(msc2count)) ix2msc = dict((ix,msc) for msc,ix in msc2ix.iteritems()) leaves = list( msc2ix ) num_leaves = len(leaves) print "Building MSC tree out of", num_leaves, "leaves" msc_tree = trees.build_msctree(msc2ix.keys(), msc2ix) #print str(trees.map_tree_leaves(msc_tree, ix2msc))[:400] msc_leaf2clusters = trees.bottomup2topdown_tree_converter(msc_tree) print "Random trees..." results = {} #{index-name: list of results} start = time.clock() for i in xrange(NUM_TRIES): print "",(time.clock()-start),i,"out of",NUM_TRIES, (num_l, num_m, indexes_dict) = compare_to_random_tree(msc_leaf2clusters, \ bonding_calc, membership_calc, membership_bonding,\ only_fast_calculations) indexes_dict["num_l"] = num_l indexes_dict["num_m"] = num_m for id, val in indexes_dict.iteritems(): results[id] = results.get(id,[])+[val] print "Results:",results