def computeKMoutliers(synthD): num = 0 stats = [] for f in synthD: #reads data and parses first file in folder sd = readSynthetic(f) print("Iteration:", num) num += 1 for i in range(5): #Running kMeansOut on the data kcent, cid, dist = kmo.kmeansOutliers(sd.data, compute_phi_star(sd), sd.z, sd.k) ans = kcent kmo_cost, index_list = kmo.cost(sd.data, cid, kcent, sd.z) average_cost = np.sum(kmo_cost) #Computing cost sd.costs.append(average_cost) #example for adding extra stats, i.e. time. For headers, go to top sd.extrastats = [0, num, num * num] printSD(sd) stats = addAnswer(stats, sd) return stats
def computeKMOutliersLloyd(synthD): num = 0 stats = [] for f in synthD: #reads data and parses first file in folder sd = readSynthetic(f) sd.phistar = compute_phi_star(sd) print("-------------------------\nIteration:", num, "\n-------------------------") num += 1 for rp in phistarprop: for j in range(1): sd.runphi = rp sd.runk = sd.k + j precs = [] recs = [] for i in range(10): #Running kMeansOut on the data centers, cid, dist, wins = kmo.kmeansOutliers( sd.data, sd.phistar * sd.runphi, sd.z, sd.runk) zind = [] for i in range(sd.k, sd.k + sd.z): zind.append(i) ans, cid, wins, prec, rec = lloyd.LloydOut( sd.data, centers, sd.runk, sd.z, 1, 100, zind) #kmo_cost, index_list = kmo.cost(sd.data, cid, ans, int(sd.z)) #average_cost= np.sum(kmo_cost) cost2 = kmo.cost2(sd.data, ans, int(sd.z)) #print("Sharvaree_cost:", average_cost) #assert(cost2 == average_cost) #Computing cost sd.costs.append(cost2) precs.append(prec) recs.append(rec) sd.precs = precs sd.recs = recs #example for adding extra stats, i.e. time. For headers, go to top sd.extrastats = [ sd.phistar, mean(np.array(precs)), max(precs), mean(np.array(recs)), max(recs) ] print(sd.phistar) printSD(sd) stats = addAnswer(stats, sd) sd.costs = [] return stats
def computeKMLS(synthD): num = 0 stats = [] for f in synthD: #reads data and parses first file in folder sd = readSynthetic(f) sd.phistar = compute_phi_star(sd) print("Iteration:", num) num += 1 for j in range(int(sd.k / 2)): print("TrueCost:", sd.phistar) sd.runphi = 1 sd.runk = sd.k + j precs = [] recs = [] for i in range(1): numpts = 2 * (sd.k + sd.z) #numpts = int(sd.n/2) sampleData = kmpp.kmeanspp(sd.data, numpts) #kmpp sampling #sampleData = sd.data #No sampling, run on all #sampleData = ls.randomInit(sd.data,numpts) #uniformly random init #Running kcenterOut on the data ans, empz = ls.lsOut(sampleData, sd.runk, sd.z, eps) cost2 = kmo.cost2(sd.data, ans, int(sd.z)) #Computing cost sd.costs.append(cost2) prec, rec = km.kMPrecRecallVar2(sd, ans, empz) precs.append(prec) recs.append(rec) sd.precs = precs sd.recs = recs #example for adding extra stats, i.e. time. For headers, go to top sd.extrastats = [ sd.phistar, mean(np.array(precs)), max(precs), mean(np.array(recs)), max(recs) ] printSD(sd) stats = addAnswer(stats, sd) sd.costs = [] return stats
def computeKMPP(synthD): num = 0 stats = [] for f in synthD: #reads data and parses first file in folder sd = readSynthetic(f) sd.phistar = compute_phi_star(sd) print("Iteration:", num) num += 1 for j in range(int(sd.k / 2)): print("TrueCost:", sd.phistar) sd.runphi = 1 sd.runk = sd.k + j precs = [] recs = [] for i in range(10): ans = kmpp.kmeanspp(sd.data, sd.runk) cost2 = kmo.cost2(sd.data, ans, int(sd.z)) #Computing cost sd.costs.append(cost2) prec, rec = km.kMPrecRecall(sd, ans) precs.append(prec) recs.append(rec) sd.precs = precs sd.recs = recs #example for adding extra stats, i.e. time. For headers, go to top sd.extrastats = [ sd.phistar, mean(np.array(precs)), max(precs), mean(np.array(recs)), max(recs) ] printSD(sd) stats = addAnswer(stats, sd) sd.costs = [] return stats
def computeKMLSCoreset(synthD): num = 0 stats = [] for f in synthD: #reads data and parses first file in folder sd = readSynthetic(f) sd.phistar = compute_phi_star(sd) print("Iteration:", num) num += 1 for j in range(int(sd.k / 2)): print("TrueCost:", sd.phistar) print(distance.cdist(sd.data[0:sd.k], sd.data[0:sd.k])) sd.runphi = 1 sd.runk = sd.k + j precs = [] recs = [] for i in range(1): #Running kcenterOut on the data numpts = 2 * (sd.k + sd.z) print("numpts", numpts) #numpts = int(sd.n/10) #numpts = sd.k + sd.z ans, empz = ls.lsOutCor(sd.data, sd.runk, sd.z, eps, numpts, debug=True) cost2 = kmo.cost2(sd.data, ans, int(sd.z)) print("------------\nBefore Lloyds:", cost2, empz) print(find_inds(sd.data, ans)) prec, rec = km.kMPrecRecallVar2(sd, ans, int(sd.z)) print("Prec, rec", prec, rec) cr = km.cr1(sd, ans) zind = [] for i in range(sd.k, sd.k + sd.z): zind.append(i) ans, cid, wins, prec, rec, garbage = lloyd.LloydOut( sd.data, ans, sd.runk, sd.z, 1, 100, zind) cost2 = kmo.cost2(sd.data, ans, int(sd.z)) #Computing cost sd.costs.append(cost2) print("After Lloyds:", cost2) print("Prec, rec", prec, rec) cr = km.cr1(sd, ans) input() precs.append(prec) recs.append(rec) sd.precs = precs sd.recs = recs #example for adding extra stats, i.e. time. For headers, go to top sd.extrastats = [ sd.phistar, mean(np.array(precs)), max(precs), mean(np.array(recs)), max(recs) ] printSD(sd) stats = addAnswer(stats, sd) sd.costs = [] return stats
def compute_phi_star(sd): return kmo.cost2(sd.data[sd.k + sd.z:], sd.data[:sd.k], int(sd.z))