def run_test(fileName, max_k): cache_dir = './cache' D = 2. T = 3. L = 1. print >> sys.stderr, "FILE: ", fileName print fileName host, paras, phi = newickFormatReader.getInput(fileName) if not os.path.exists(cache_dir): os.makedirs(cache_dir) f = open('%s/README' % cache_dir, 'w') f.write('This directory holds a cache of reconciliation graph for the TreeLife data set') f.close() cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1]) recon_count_location = '%s/%s.count' % (cache_dir, os.path.split(fileName)[1]) if not(os.path.isfile(cache_location)) or not(os.path.isfile(recon_count_location)): print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file' print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L) f = open(cache_location, 'w+') g = open(recon_count_location, 'w+') f.write(repr(DictGraph)) g.write(str(numRecon)) f.close() g.close() print >> sys.stderr, 'Loading reonciliation graph from cache' f = open(cache_location) g = open(recon_count_location) DictGraph = eval(f.read()) numRecon = float(g.read()) f.close() g.close() ## Only consider running algorithm for reconciliations with more than # threshold MPRs if (numRecon < recon_threshold): print >> sys.stderr, 'Too few reconciliations: ', numRecon return else: print >> sys.stderr, 'Reconciliation Count: ', numRecon scoresList, dictReps = Greedy.Greedy(DictGraph, paras) print >> sys.stderr, 'Found cluster representatives using point-collecting' graph = ReconGraph.ReconGraph(DictGraph) setReps = [ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps] random.seed(0) extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)] representatives = setReps + extra_reps print >> sys.stderr, 'Starting K Means algorithm ... ' print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step' for i in xrange(1, max_k + 1): # print 'k = %d' % i KMeans.k_means(graph, 10, i, 0, representatives[:i]) # KMeans.k_means(graph, 10, i, seed, None) dist_sum = 0 n = 10 for _ in xrange(n): reps = [KMeans.get_weighted_template(graph) for _ in xrange(i)] average, maximum = KMeans.cluster_quality(graph, reps) dist_sum += average gc.collect() print float(dist_sum) / n
def run_test(fileName, max_k): cache_dir = './cache' D = 2. T = 3. L = 1. print >> sys.stderr, "FILE: ", fileName print fileName host, paras, phi = newickFormatReader.getInput(fileName) if not os.path.exists(cache_dir): os.makedirs(cache_dir) f = open('%s/README' % cache_dir, 'w') f.write('This directory holds a cache of reconciliation graph for the TreeLife data set') f.close() cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1]) recon_count_location = '%s/%s.count' % (cache_dir, os.path.split(fileName)[1]) if not(os.path.isfile(cache_location)) or not(os.path.isfile(recon_count_location)): print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file' print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L) f = open(cache_location, 'w+') g = open(recon_count_location, 'w+') f.write(repr(DictGraph)) g.write(str(numRecon)) f.close() g.close() print >> sys.stderr, 'Loading reonciliation graph from cache' f = open(cache_location) g = open(recon_count_location) DictGraph = eval(f.read()) numRecon = float(g.read()) f.close() g.close() ## Only consider running algorithm for reconciliations with more than # threshold MPRs if (numRecon < recon_threshold): print >> sys.stderr, 'Too few reconciliations: ', numRecon return else: print >> sys.stderr, 'Reconciliation Count: ', numRecon scoresList, dictReps = Greedy.Greedy(DictGraph, paras) graph = ReconGraph.ReconGraph(DictGraph) representatives = [ReconGraph.dictRecToSetRec(graph, dictReps[0])] ## Debug info ## Modifies the graph ## Checking for the case when there is an error in likelihood print >> sys.stderr, "== Checking for likelihoods over 1 ==" found = False for key in DictGraph.keys(): children = DictGraph[key] for child in children[:-1]: if child[-1] > 1: # Attempt to round to fix large float math errors roundedValue = round(child[-1]) if roundedValue != 1.0: print >> sys.stderr, "ERR FOUND: ", key, child found = True if not(found): print >> sys.stderr, "NO ERR(s)" print >> sys.stderr, "== End of over 1 checks. ==" print >> sys.stderr, 'Starting K-centers algorithm ... ' for i in xrange(2, max_k + 2): d, newrep = maximize(graph,representatives) if not all(d_i > 0 for d_i in d): print >> sys.stderr, "Distance vector contains 0", d break print i-1, min(d), representatives.append(newrep) dist_sum = 0 n = 10 for _ in xrange(n): reps = [KMeans.get_weighted_template(graph) for _ in xrange(i-1)] dist_sum += min_d(maximize(graph,reps)) print float(dist_sum) / n print >> sys.stderr, "Finished k centers algorithm ..."