def kmeansTest(k=2, n=20, verbose=False): random.seed(0) xMean = 3 xSD = 1 yMean = 5 ySD = 1 d1Samples = util.genDistribution(xMean, xSD, yMean, ySD, n, '1.') d2Samples = util.genDistribution(xMean+3, xSD, yMean+1, ySD, n, '2.') allSamples = d1Samples + d2Samples print("before clustering") util.plot_cluster([cluster.Cluster(allSamples)]) print("after clustering") clusters = kmeans(allSamples, k, verbose) util.plot_cluster(clusters, verbose) print('Final result') for c in clusters: print('', c)
#### Implement the centroid updating function here! def update(self, samples): """Replace the samples in the cluster by new samples Return: how much the centroid has changed""" return helper.update(self, samples) def __str__(self): names = [] for e in self.samples: names.append(e.getName()) names.sort() result = 'Cluster with centroid '\ + str(self.centroid.getFeatures()) + ' contains:\n ' for e in names: result = result + e + ', ' return result[:-2] if __name__ == "__main__": test_samples = util.genDistribution() c = Cluster(test_samples) print(c.centroid) print("cluster center: ", c.centroid.features) util.plot_cluster([c]) # now assign the cluster new samples, and move it test_samples2 = util.genDistribution(1, 1, 1, 1, 30) diff = c.update(test_samples2) print("center moved: ", diff) # plot_cluster expects an array of cluster... util.plot_cluster([c])
all_cluster.append(cl.Cluster(tmp_data)) data += tmp_data def onclick(event): # Creating a new point and finding the k nearest neighbours new = sample.Sample('', [event.xdata, event.ydata], '') knn(new, data, K) # draw the new point data.append(new) pylab.scatter([new.getFeatures()[0]], \ [new.getFeatures()[1]], \ label = new.getLabel(), \ marker = util.make_cmarkers()[LABELS.index(new.getLabel())], \ color = util.make_cmap()[LABELS.index(new.getLabel())]) pylab.draw() # start plotting fig = pylab.figure() cid = fig.canvas.mpl_connect('button_press_event', onclick) util.plot_cluster(all_cluster, centroid=False) pylab.show() # new_pt = sample.Sample('', [0.2, 0.3], '') # knn(new_pt, data, K) # # data.append(new_pt) # print("\nafter....") # util.plotSamples(data) # plt.show()
pass """ fill the array allSamples to hold the samples, each sample takes two attributes of an iris instance """ for line in data: content = line.strip().split(",") d = sample.Sample('', [float(content[1]), float(content[3])]) allSamples.append(d) # ---- end of your code --- # verbose = False k = 3 print("before clustering") unclustered = [kmeans.Cluster(allSamples)] util.plot_cluster(unclustered) clusters = unclustered print("after clustering") # IMPLEMENTATION: apply k means to cluster the samples # ---- start your code ---- # pass clusters = kmeans.kmeans(allSamples, 3, verbose) # ---- end of your code --- # util.plot_cluster(clusters, verbose) """ bonus """ normalized_allSamples = allSamples print("after normalizing")