Exemplo n.º 1
0
def simulation_h2m(msc, num_iters=len(msc)):
    leaf2clusters = trees.bottomup2topdown_tree_converter(msc)
    results = {}  #{index-name: list of results}
    msc2 = msc
    for i in xrange(0, num_iters):
        print "Iteration", i, "out of", num_iters
        leaf2clusters2 = trees.bottomup2topdown_tree_converter(msc2)
        indexes_dict = get_indexes_dict(leaf2clusters, leaf2clusters2,
                                        bonding_calc, membership_calc,
                                        membership_bonding,
                                        only_fast_simindexes)
        #print indexes_dict
        for id, val in indexes_dict.iteritems():
            results[id] = results.get(id, []) + [val]

        msc3 = []
        nummodifications = 0
        for h in msc2:
            if len(h) == 1 or nummodifications >= MAX_MODIFICATIONS:
                msc3.append(h)
            else:
                new_h = []
                for m in h:
                    new_h.extend(m)
                    msc3.append([new_h])
                nummodifications = nummodifications + 1
        #print msc3
        msc2 = msc3
    return results
def simulation_h2m(msc, num_iters = len(msc)):
    leaf2clusters = trees.bottomup2topdown_tree_converter(msc)
    results = {} #{index-name: list of results}
    msc2 = msc                    
    for i in xrange(0,num_iters):
        print "Iteration",i,"out of",num_iters
        leaf2clusters2 = trees.bottomup2topdown_tree_converter(msc2)                
        indexes_dict = get_indexes_dict(leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes)
        #print indexes_dict
        for id, val in indexes_dict.iteritems():
            results[id] = results.get(id,[])+[val]
        
        msc3 = []
        nummodifications = 0
        for h in msc2:
            if len(h) == 1 or nummodifications >= MAX_MODIFICATIONS:
                msc3.append(h)
            else:          
                new_h = []      
                for m in h:
                    new_h.extend(m)
                    msc3.append([new_h])
                nummodifications = nummodifications + 1
        #print msc3
        msc2 = msc3 
    return results
def flat_comparision(msc):
    leaf2clusters = trees.bottomup2topdown_tree_converter(msc)
    print "-------------------------------------------------------"
    print "Number of nodes at H level:",len(msc)      
    leaves = []
    for h in msc:
        for m in h:
            for l in m:
                leaves.append(l)
    print 'Extracted leaves:',str(leaves)[:200],"..."
    print "-------------------------------------------------------"
    msc2 = [[leaves]]
    leaf2clusters2 = trees.bottomup2topdown_tree_converter(msc2)    
    print "For tree build of single leaves:",get_indexes_dict(leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes)
Exemplo n.º 4
0
def flat_comparision(msc):
    leaf2clusters = trees.bottomup2topdown_tree_converter(msc)
    print "-------------------------------------------------------"
    print "Number of nodes at H level:", len(msc)
    leaves = []
    for h in msc:
        for m in h:
            for l in m:
                leaves.append(l)
    print 'Extracted leaves:', str(leaves)[:200], "..."
    print "-------------------------------------------------------"
    msc2 = [[leaves]]
    leaf2clusters2 = trees.bottomup2topdown_tree_converter(msc2)
    print "For tree build of single leaves:", get_indexes_dict(
        leaf2clusters, leaf2clusters2, bonding_calc, membership_calc,
        membership_bonding, only_fast_simindexes)
def generate_3level_tree(sim_matrix_l, clustering_l, similarity_aggregator_m, clustering_m):
    """Returns 3level tree generated using similarity matrix=sim_matrix_l, given clustering methods and similarity matrix aggregation method."""            
    #logging.info("[generate_3level_tree] --------------------------------------------------------")
    logging.info("[generate_3level_tree] Clustering L-level (xxyzz) (method:"+str(clustering_l)+")...")
    assignment_l = clustering_l(sim_matrix_l)
    #sil =  silhouettes(sim_matrix_l, range(2,len(sim_matrix_l),1), upgma.upgma_clustering, f=avgmax)
    #sil2 = dict( (s,k) for k,s in sil.iteritems() )
    logging.info("[generate_3level_tree] assignment_l = "+str(assignment_l)[:200])        
    
    #logging.info("[generate_3level_tree] --------------------------------------------------------")
    logging.info("[generate_3level_tree] Aggregating similarity matrix on M-level (aggregator:"+str(similarity_aggregator_m)+")...")
    sim_matrix_m = sim_matrix.aggregate_similarity_matrix_a(sim_matrix_l, assignment_l, similarity_aggregator_m)
    logging.info("[generate_3level_tree]  sim_matrix_m of size "+str(len(sim_matrix_m))+"x"+str(len(sim_matrix_m[0])))
    logging.info("[generate_3level_tree] \n"+str(numpy.array(sim_matrix_m))[:500])

    #logging.info("[generate_3level_tree] --------------------------------------------------------")
    logging.info("[generate_3level_tree] Clustering M-level (xxy) (method:"+str(clustering_m)+")...")
    assignment_m = clustering_m(sim_matrix_m)
    logging.info("[generate_3level_tree] assignment_m = "+str(assignment_m)[:200])

    #logging.info("[generate_3level_tree] --------------------------------------------------------")
    logging.info("[generate_3level_tree] Building 3level tree with assignment_l and assignment_m")
    new_tree = trees.build_3level_tree(assignment_l, assignment_m)
    new_leaf2clusters = trees.bottomup2topdown_tree_converter(new_tree)
    
    return new_leaf2clusters,new_tree
def B_using_tree(tree, bonding_calc = lambda common_path_fraction: common_path_fraction):
    """Generates bonding matrix for given tree.
    
    tree - description of a tree (given as a list of lists of lists...)
    For additional documentation see: B_using_tree_l2c.
    Sample use:
    >>> B_using_tree([ [[['a','b'], ['c']] , [['d','e','f'],['g','h']]], [[['x']],[['y']]] ], bonding_calc = lambda common_path_fraction: common_path_fraction*4.0) == [[4, 3, 2, 1, 1, 1, 1, 1, 0, 0], [3, 4, 2, 1, 1, 1, 1, 1, 0, 0], [2, 2, 4, 1, 1, 1, 1, 1, 0, 0], [1, 1, 1, 4, 3, 3, 2, 2, 0, 0], [1, 1, 1, 3, 4, 3, 2, 2, 0, 0], [1, 1, 1, 3, 3, 4, 2, 2, 0, 0], [1, 1, 1, 2, 2, 2, 4, 3, 0, 0], [1, 1, 1, 2, 2, 2, 3, 4, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 4, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 4]] 
    True
    """
    leaf2clusters = trees.bottomup2topdown_tree_converter(tree)
    return B_using_tree_l2c(leaf2clusters, bonding_calc)
def M_dictionary(tree, membership_calc = lambda common_levels: common_levels):
    """Generates membership dictionary{leaf:membership-vector} for given tree.
    
    tree - description of a tree (given as a list of lists of lists...)
    For additional documentation see: M_dictionary_l2c.    
    Sample use:
    >>> sorted(list(M_dictionary([ [[['a','b'], ['c']] , [['d','e','f'],['g','h']]], [[['x']],[['y']]] ]).iteritems()))
    [('a', [3, 2, 1, 1, 0, 0]), ('b', [3, 2, 1, 1, 0, 0]), ('c', [2, 3, 1, 1, 0, 0]), ('d', [1, 1, 3, 2, 0, 0]), ('e', [1, 1, 3, 2, 0, 0]), ('f', [1, 1, 3, 2, 0, 0]), ('g', [1, 1, 2, 3, 0, 0]), ('h', [1, 1, 2, 3, 0, 0]), ('x', [0, 0, 0, 0, 3, 1]), ('y', [0, 0, 0, 0, 1, 3])]
    """
    #{leaf: descending-list-of-clusters}    
    leaf2clusters = trees.bottomup2topdown_tree_converter(tree)
    return M_dictionary_l2c(leaf2clusters, membership_calc)
def M_dictionary(tree, membership_calc=lambda common_levels: common_levels):
    """Generates membership dictionary{leaf:membership-vector} for given tree.
    
    tree - description of a tree (given as a list of lists of lists...)
    For additional documentation see: M_dictionary_l2c.    
    Sample use:
    >>> sorted(list(M_dictionary([ [[['a','b'], ['c']] , [['d','e','f'],['g','h']]], [[['x']],[['y']]] ]).iteritems()))
    [('a', [3, 2, 1, 1, 0, 0]), ('b', [3, 2, 1, 1, 0, 0]), ('c', [2, 3, 1, 1, 0, 0]), ('d', [1, 1, 3, 2, 0, 0]), ('e', [1, 1, 3, 2, 0, 0]), ('f', [1, 1, 3, 2, 0, 0]), ('g', [1, 1, 2, 3, 0, 0]), ('h', [1, 1, 2, 3, 0, 0]), ('x', [0, 0, 0, 0, 3, 1]), ('y', [0, 0, 0, 0, 1, 3])]
    """
    #{leaf: descending-list-of-clusters}
    leaf2clusters = trees.bottomup2topdown_tree_converter(tree)
    return M_dictionary_l2c(leaf2clusters, membership_calc)
def compare_to_random_tree(msc_leaf2clusters, \
                           bonding_calc, membership_calc, membership_bonding,\
                           only_fast_calculations = False):
    leaves = list( msc_leaf2clusters )     
    rand_tree,num_l,num_m = get_random_tree2(leaves)
    
    rand_leaf2clusters = trees.bottomup2topdown_tree_converter(rand_tree)
    indexes_dict = tree_distance.get_indexes_dict(msc_leaf2clusters, rand_leaf2clusters, \
                                                  bonding_calc, membership_calc, membership_bonding,\
                                                  only_fast_calculations)
    #print indexes_dict
     
    return (num_l, num_m, indexes_dict)
def _comparision_report_(T, T2):
    """Prints comparsion results for two trees: T and T2."""
    print "------------------------------------------------------"
    print "Tree1:", T
    print "Tree2:", T2

    bonding_calc = lambda common_path_fraction: common_path_fraction
    membership_calc = lambda common_levels: common_levels / 2.0
    membership_bonding = angular_bonding

    leaf2clusters = trees.bottomup2topdown_tree_converter(T)
    leaf2clusters2 = trees.bottomup2topdown_tree_converter(T2)

    indexes_dict = get_indexes_dict(leaf2clusters, leaf2clusters2,
                                    bonding_calc, membership_calc,
                                    membership_bonding, False)
    print indexes_dict

    ####################################################
    return
    print "Multilabelling example:---------------"
    M1 = [[0.67, 0.67, 0.33, 0.33, 0.67, 0.00],
          [0.33, 0.33, 0.67, 0.67, 0.33, 0.00],
          [0.00, 0.00, 0.00, 0.00, 0.00, 0.67]]
    B1 = B_using_membership(M1)

    M2 = [[0.33, 0.67, 0.33, 0.00, 0.00, 0.00],
          [0.33, 0.33, 0.67, 0.67, 0.33, 0.00],
          [0.00, 0.00, 0.00, 0.33, 0.67, 0.67]]
    B2 = B_using_membership(M2)

    M3 = [[0.33, 0.67, 0.67, 0.33, 0.33, 0.00],
          [0.33, 0.33, 0.67, 0.67, 0.33, 0.00],
          [0.00, 0.00, 0.00, 0.33, 0.67, 0.67]]
    B3 = B_using_membership(M3)

    print "HRI(M1,M2)", (1.0 - H_distance(B1, B2))
    print "HRI(M2,M3)", (1.0 - H_distance(B2, B3))
    print "HRI(M1,M3)", (1.0 - H_distance(B1, B3))
def _comparision_report_(T,T2):
    """Prints comparsion results for two trees: T and T2."""
    print "------------------------------------------------------"
    print "Tree1:",T
    print "Tree2:",T2

    bonding_calc = lambda common_path_fraction: common_path_fraction
    membership_calc = lambda common_levels: common_levels/2.0
    membership_bonding = angular_bonding
    
    leaf2clusters = trees.bottomup2topdown_tree_converter(T)
    leaf2clusters2 = trees.bottomup2topdown_tree_converter(T2)
    
    indexes_dict = get_indexes_dict(leaf2clusters, leaf2clusters2, bonding_calc, membership_calc, membership_bonding, False)
    print indexes_dict
    
    ####################################################
    return
    print "Multilabelling example:---------------"
    M1 = [[0.67,0.67,0.33,0.33,0.67,0.00],
          [0.33,0.33,0.67,0.67,0.33,0.00],
          [0.00,0.00,0.00,0.00,0.00,0.67]]
    B1 = B_using_membership(M1)

    M2 = [[0.33,0.67,0.33,0.00,0.00,0.00],
          [0.33,0.33,0.67,0.67,0.33,0.00],
          [0.00,0.00,0.00,0.33,0.67,0.67]]
    B2 = B_using_membership(M2)

    M3 = [[0.33,0.67,0.67,0.33,0.33,0.00],
          [0.33,0.33,0.67,0.67,0.33,0.00],
          [0.00,0.00,0.00,0.33,0.67,0.67]]
    B3 = B_using_membership(M3)
    
    print "HRI(M1,M2)",(1.0-H_distance(B1,B2))
    print "HRI(M2,M3)",(1.0-H_distance(B2,B3))
    print "HRI(M1,M3)",(1.0-H_distance(B1,B3))
def generate_3level_tree(sim_matrix_l, clustering_l, similarity_aggregator_m,
                         clustering_m):
    """Returns 3level tree generated using similarity matrix=sim_matrix_l, given clustering methods and similarity matrix aggregation method."""
    #logging.info("[generate_3level_tree] --------------------------------------------------------")
    logging.info("[generate_3level_tree] Clustering L-level (xxyzz) (method:" +
                 str(clustering_l) + ")...")
    assignment_l = clustering_l(sim_matrix_l)
    #sil =  silhouettes(sim_matrix_l, range(2,len(sim_matrix_l),1), upgma.upgma_clustering, f=avgmax)
    #sil2 = dict( (s,k) for k,s in sil.iteritems() )
    logging.info("[generate_3level_tree] assignment_l = " +
                 str(assignment_l)[:200])

    #logging.info("[generate_3level_tree] --------------------------------------------------------")
    logging.info(
        "[generate_3level_tree] Aggregating similarity matrix on M-level (aggregator:"
        + str(similarity_aggregator_m) + ")...")
    sim_matrix_m = sim_matrix.aggregate_similarity_matrix_a(
        sim_matrix_l, assignment_l, similarity_aggregator_m)
    logging.info("[generate_3level_tree]  sim_matrix_m of size " +
                 str(len(sim_matrix_m)) + "x" + str(len(sim_matrix_m[0])))
    logging.info("[generate_3level_tree] \n" +
                 str(numpy.array(sim_matrix_m))[:500])

    #logging.info("[generate_3level_tree] --------------------------------------------------------")
    logging.info("[generate_3level_tree] Clustering M-level (xxy) (method:" +
                 str(clustering_m) + ")...")
    assignment_m = clustering_m(sim_matrix_m)
    logging.info("[generate_3level_tree] assignment_m = " +
                 str(assignment_m)[:200])

    #logging.info("[generate_3level_tree] --------------------------------------------------------")
    logging.info(
        "[generate_3level_tree] Building 3level tree with assignment_l and assignment_m"
    )
    new_tree = trees.build_3level_tree(assignment_l, assignment_m)
    new_leaf2clusters = trees.bottomup2topdown_tree_converter(new_tree)

    return new_leaf2clusters, new_tree
Exemplo n.º 13
0
def get_random_tree_leaf2clusters(leaves, minpow = 0.25, maxpow = 0.75):
    """See: get_random_tree."""
    rand_tree = get_random_tree(leaves, minpow, maxpow)    
    rand_leaf2clusters = trees.bottomup2topdown_tree_converter(rand_tree)
    return rand_leaf2clusters,rand_tree
Exemplo n.º 14
0
def self_comparision(msc):
    leaf2clusters = trees.bottomup2topdown_tree_converter(msc)
    print "-------------------------------------------------------"
    print "Tree compared to itself:", get_indexes_dict(
        leaf2clusters, leaf2clusters, bonding_calc, membership_calc,
        membership_bonding, only_fast_simindexes)
def self_comparision(msc):    
    leaf2clusters = trees.bottomup2topdown_tree_converter(msc)
    print "-------------------------------------------------------"
    print "Tree compared to itself:",get_indexes_dict(leaf2clusters, leaf2clusters, bonding_calc, membership_calc, membership_bonding, only_fast_simindexes)
Exemplo n.º 16
0
        for msc in msc_codes:
            #print msc,"->",(not VALID_LEAF_PATTERN_RE.match(msc) is None)
            if not VALID_LEAF_PATTERN_RE.match(msc) is None:
                msc2count[msc] = msc2count.get(msc, 0)+1        
    print "Filtering for with MIN_COUNT_MSC:",MIN_COUNT_MSC," out of", sum(msc2count.values())
    msc2count = dict((msc,count) for msc,count in msc2count.iteritems() if count>=MIN_COUNT_MSC)
    print "Building mapping msc2ix"
    msc2ix = dict((msc,ix) for ix,msc in enumerate(msc2count))
    ix2msc = dict((ix,msc) for msc,ix in msc2ix.iteritems())
    leaves = list( msc2ix )
    num_leaves = len(leaves)
    print "Building MSC tree out of", num_leaves, "leaves"
    msc_tree = trees.build_msctree(msc2ix.keys(), msc2ix)
    #print str(trees.map_tree_leaves(msc_tree, ix2msc))[:400] 

    msc_leaf2clusters = trees.bottomup2topdown_tree_converter(msc_tree)
                        
                    
    print "Random trees..."
    results = {} #{index-name: list of results}
    start = time.clock()
    for i in xrange(NUM_TRIES):
        print "",(time.clock()-start),i,"out of",NUM_TRIES,
        (num_l, num_m, indexes_dict) = compare_to_random_tree(msc_leaf2clusters, \
                                                              bonding_calc, membership_calc, membership_bonding,\
                                                              only_fast_calculations)
        indexes_dict["num_l"] = num_l
        indexes_dict["num_m"] = num_m
        for id, val in indexes_dict.iteritems():
            results[id] = results.get(id,[])+[val]
    print "Results:",results