示例#1
0
                diff = 0
                for i in xrange(k):
                    diff += distance.euclidean(newCentroids[i], oldCentroids[i])
                    
                logger.debug( 'means total diff %f:' % diff)
                if diff < delta:
                    break
                else:
                    oldCentroids[:] = newCentroids
                    np.savetxt(centroidsinputfile, newCentroids)
                    
            
            logger.debug( 'total time: %f' %total)
    
            #Calculate SSE
            SSE = Utils.calcSSE(points, newCentroids)
            logger.info('%.f' % SSE)
            logger.debug( 'Sum of Squared Error: %s' % SSE)
            
        
            #per data batch
            #finds the distance to nearest cluster        
            distmatrix = distance.cdist(points, newCentroids, metric='euclidean')
            labels = distmatrix.argmin(axis=1)

        #Local
        #points = np.loadtxt(inputfile1)
#        Plot.plotPoints(points, labels, title='final kmeans2')
#        Plot.plotMeans(newCentroids)
#        strnow = datetime.now().strftime("%Y-%m-%d_%H%M%S")
#        plt.savefig('%s_%s.png' % (strnow,fileoutpostfix))