Python KMeans.get_weighted_template示例

def run_test(fileName, max_k):
    cache_dir = './cache'
    D = 2.
    T = 3.
    L = 1.
    
    print >> sys.stderr, "FILE: ", fileName
    print fileName

    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write('This directory holds a cache of reconciliation graph for the TreeLife data set')
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    recon_count_location = '%s/%s.count' % (cache_dir, os.path.split(fileName)[1])
    if not(os.path.isfile(cache_location)) or not(os.path.isfile(recon_count_location)):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
        f = open(cache_location, 'w+')
        g = open(recon_count_location, 'w+')
        f.write(repr(DictGraph))
        g.write(str(numRecon))
        f.close()
        g.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    f = open(cache_location)
    g = open(recon_count_location)
    DictGraph = eval(f.read())
    numRecon = float(g.read())
    f.close()
    g.close()

    ## Only consider running algorithm for reconciliations with more than 
    # threshold MPRs
    if (numRecon < recon_threshold):
        print >> sys.stderr, 'Too few reconciliations: ', numRecon
        return 
    else:
        print >> sys.stderr, 'Reconciliation Count: ', numRecon



    scoresList, dictReps = Greedy.Greedy(DictGraph, paras)

    print >> sys.stderr, 'Found cluster representatives using point-collecting'

    graph = ReconGraph.ReconGraph(DictGraph)
    setReps = [ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps]
    random.seed(0)
    extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)]

    representatives = setReps + extra_reps

    print >> sys.stderr, 'Starting K Means algorithm ... '
    print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step'

    for i in xrange(1, max_k + 1):
        # print 'k = %d' % i
        KMeans.k_means(graph, 10, i, 0, representatives[:i])
        # KMeans.k_means(graph, 10, i, seed, None)
        dist_sum = 0
        n = 10
        for _ in xrange(n):
            reps = [KMeans.get_weighted_template(graph) for _ in xrange(i)]
            average, maximum = KMeans.cluster_quality(graph, reps)
            dist_sum += average
            gc.collect()
        print float(dist_sum) / n

示例#2

显示文件

文件： k_centers.py 项目： jeansung/ClusteringAlgorithmsDTL

def run_test(fileName, max_k):
    cache_dir = './cache'
    D = 2.
    T = 3.
    L = 1.

    print >> sys.stderr, "FILE: ", fileName
    print fileName

    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write('This directory holds a cache of reconciliation graph for the TreeLife data set')
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    recon_count_location = '%s/%s.count' % (cache_dir, os.path.split(fileName)[1])
    if not(os.path.isfile(cache_location)) or not(os.path.isfile(recon_count_location)):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
        f = open(cache_location, 'w+')
        g = open(recon_count_location, 'w+')
        f.write(repr(DictGraph))
        g.write(str(numRecon))
        f.close()
        g.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    f = open(cache_location)
    g = open(recon_count_location)
    DictGraph = eval(f.read())
    numRecon = float(g.read())
    f.close()
    g.close()

    
    
    ## Only consider running algorithm for reconciliations with more than 
    # threshold MPRs
    if (numRecon < recon_threshold):
        print >> sys.stderr, 'Too few reconciliations: ', numRecon
        return 
    else:
        print >> sys.stderr, 'Reconciliation Count: ', numRecon



    scoresList, dictReps = Greedy.Greedy(DictGraph, paras)
    graph = ReconGraph.ReconGraph(DictGraph)
    representatives = [ReconGraph.dictRecToSetRec(graph, dictReps[0])]

    ## Debug info
    ## Modifies the graph 
    ## Checking for the case when there is an error in likelihood 
    print >> sys.stderr, "== Checking for likelihoods over 1 =="
    found = False 
    for key in DictGraph.keys():
        children = DictGraph[key]
        for child in children[:-1]:
            if child[-1] > 1:
                # Attempt to round to fix large float math errors
                roundedValue = round(child[-1])
                if roundedValue != 1.0:
                    print >> sys.stderr, "ERR FOUND: ", key, child 
                    found = True 
                
    if not(found):
        print >> sys.stderr, "NO ERR(s)"
    print >> sys.stderr, "== End of over 1 checks. =="



    print >> sys.stderr, 'Starting K-centers algorithm ... '
    for i in xrange(2, max_k + 2):
        d, newrep = maximize(graph,representatives)
        if not all(d_i > 0 for d_i in d):
            print >> sys.stderr, "Distance vector contains 0", d 
            break

        print i-1, min(d),
        representatives.append(newrep)
        dist_sum = 0
        n = 10
        for _ in xrange(n):
            reps = [KMeans.get_weighted_template(graph) for _ in xrange(i-1)]
            dist_sum += min_d(maximize(graph,reps))
        print float(dist_sum) / n

    print  >> sys.stderr, "Finished k centers algorithm ..."