def k_means_clustering (k, centroids, points): iterations = 0 while (True): recalculatedCentroids = [] iterations += 1 # This is a 2-d list where each inner list is a list of points # associated with the corresponding centroid/cluster clusters = [[]] * k for point in points: new_clusterPoints = point.find_closest_point(centroids) clusters[new_clusterPoints].append(point) # recalculate the new centroids for cluster in clusters: recalculatedCentroids.append(Point.getAverage(cluster)) # check if centroids match if (set(centroids) == set(recalculatedCentroids)): print "Clustering completed in " + str(iterations) + " iterations" return recalculatedCentroids # re-iterate if new and old centroids do not match centroids = copy.deepcopy(recalculatedCentroids)