diff = 0 for i in xrange(k): diff += distance.euclidean(newCentroids[i], oldCentroids[i]) logger.debug( 'means total diff %f:' % diff) if diff < delta: break else: oldCentroids[:] = newCentroids np.savetxt(centroidsinputfile, newCentroids) logger.debug( 'total time: %f' %total) #Calculate SSE SSE = Utils.calcSSE(points, newCentroids) logger.info('%.f' % SSE) logger.debug( 'Sum of Squared Error: %s' % SSE) #per data batch #finds the distance to nearest cluster distmatrix = distance.cdist(points, newCentroids, metric='euclidean') labels = distmatrix.argmin(axis=1) #Local #points = np.loadtxt(inputfile1) # Plot.plotPoints(points, labels, title='final kmeans2') # Plot.plotMeans(newCentroids) # strnow = datetime.now().strftime("%Y-%m-%d_%H%M%S") # plt.savefig('%s_%s.png' % (strnow,fileoutpostfix))