def kmeans(samples, k, verbose): """Assumes samples is a list of samples of class Sample, k is a positive int, verbose is a Boolean Returns a list containing k clusters. """ # Get k randomly chosen initial centroids initialCentroids = random.sample(samples, k) # Create a singleton cluster for each centroid clusters = [] for e in initialCentroids: clusters.append(cluster.Cluster([e])) # Iterate until centroids do not change converged = False numIterations = 0 while not converged: numIterations += 1 # replace the following line by implementing # kmeans_iter(samples, clusters, k) in this file converged = helper.kmeans_iter(samples, clusters, k) # converged = kmeans_iter(samples, clusters, k) if verbose: print('Iteration #' + str(numIterations)) for c in clusters: print(c) print('\n') # add blank line return clusters
def kmeansTest(k=2, n=20, verbose=False): random.seed(0) xMean = 3 xSD = 1 yMean = 5 ySD = 1 d1Samples = util.genDistribution(xMean, xSD, yMean, ySD, n, '1.') d2Samples = util.genDistribution(xMean+3, xSD, yMean+1, ySD, n, '2.') allSamples = d1Samples + d2Samples print("before clustering") util.plot_cluster([cluster.Cluster(allSamples)]) print("after clustering") clusters = kmeans(allSamples, k, verbose) util.plot_cluster(clusters, verbose) print('Final result') for c in clusters: print('', c)
# 's', 'p', '*', 'h', 'H', 'D', 'd') # colors = ('b', 'g', 'c', 'm', 'y', 'k') # return [c + m for m in markers for c in colors] def plot_cluster(clusters, verbose = False, centroid = True): MARKERS = make_cmarkers() COLORS = make_cmap() for l in range(len(clusters)): c = clusters[l] cm = COLORS[l]+ MARKERS[l] plotSamples(c.getMembers(), cm, verbose) if centroid: plotSamples([c.centroid], 'sr') plt.show() if __name__ == "__main__": #print(minkowskiDist([0, 0], [1, 1], 1)) #print(minkowskiDist([0, 0], [1, 1], 2)) test_samples = genDistribution() c = cluster.Cluster(test_samples) plot_cluster([c]) # plotSamples(test_samples, 'o') # plotSamples([test_samples[0]], 'sk') # plt.show()
# make data random.seed(0) n = 100 K = 3 LABELS = ('a', 'b', 'c') all_cluster = [] data = [] for i in range(K): tmp_data = util.genDistribution(i * 2 + 1, 1, i * 2 + 1, 1, n=20, label=LABELS[i]) all_cluster.append(cl.Cluster(tmp_data)) data += tmp_data def onclick(event): # Creating a new point and finding the k nearest neighbours new = sample.Sample('', [event.xdata, event.ydata], '') knn(new, data, K) # draw the new point data.append(new) pylab.scatter([new.getFeatures()[0]], \ [new.getFeatures()[1]], \ label = new.getLabel(), \ marker = util.make_cmarkers()[LABELS.index(new.getLabel())], \ color = util.make_cmap()[LABELS.index(new.getLabel())]) pylab.draw()