Python Plot.subplotClusters示例

编程语言: Python

命名空间/包名称: util.utilities

类/类型: Plot

方法/功能: subplotClusters

hotexamples.com的示例: 2

Python Plot.subplotClusters - 已找到2个示例。这些是从开源项目中提取的最受好评的util.utilities.Plot.subplotClusters现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

subplotClusters(2)

plotIntermediateClusters(1)

示例#1

显示文件

文件： kmeans.py 项目： sigurdurm/k-means

    def run(self, data, means, numberOfClusters, threshold, maxiterations):
        # initialize means
#        means = np.array(random.sample(data, numberOfClusters))
#        print "Initial Means: \n%s" % means
        
        pointsInClusters = np.zeros(numberOfClusters)
        SSE = 0
        labels = np.zeros(len(data), dtype=int)
        iteration = 0
        
        total = 0
        #plotting
#        self.doPlots(data, labels, means, iteration, title='iteration %i:' % (iteration))
#        import pdb;pdb.set_trace()
        while iteration < maxiterations:
            start = time()
            
            #initialize labels for each iteration
            iteration += 1
            print "Iteration %d" % iteration 
            
            #Distance matrics version
            #Using distance matrix calculations
#            #Calculating the distance to nearest cluster
            meansNew, pointsInClusters = Utils.calcNewMeans(data, means)
            
            #calculate a new mean for each cluster
#            meansNew, pointsInClusters = self.calculateMeans(data, labels, numberOfClusters)
            

            #find nearest centroid, where line is a data vector
            
            #feature diff feature version!
#            meansNew = np.zeros((numberOfClusters, len(data[0])))
#            pointsInClusters = np.zeros(numberOfClusters)
#            for i in xrange(len(data)):
#                mindist = sys.maxint
#                minCentroid = None
#                point = data[i]  
#                for idx in xrange(numberOfClusters):
#                    d = 0
#                    for i in xrange(len(means[idx])):
#                        c = means[idx]
#                        d += abs(point[i] - c[i])**2
#                    
#                    d = math.sqrt(d)
#                    if(d < mindist or minCentroid == None):
#                        mindist = d
#                        minCentroid = idx
#                labels[i] = minCentroid
#                meansNew[minCentroid] += point
#                pointsInClusters[minCentroid] += 1
#            
#            for i in xrange(len(meansNew)):
#                meansNew[i] = meansNew[i] / float(pointsInClusters[i])
                
                
                #Point diff Point (vectorisation)!
#            meansNew = np.zeros((numberOfClusters, len(data[0])))
#            pointsInClusters = np.zeros(numberOfClusters)
#            for i in xrange(len(data)):
#                mindist = sys.maxint
#                minCentroid = None
#                point = data[i]  
#                for idx in xrange(numberOfClusters):
#                    d = np.sqrt(np.sum((point-means[idx])**2))
#                    if(d < mindist or minCentroid == None):
#                        mindist = d
#                        minCentroid = idx
#                labels[i] = minCentroid
#                meansNew[minCentroid] += point
#                pointsInClusters[minCentroid] += 1
#            
#            for i in xrange(len(meansNew)):
#                meansNew[i] = meansNew[i] / float(pointsInClusters[i])

            #Point diff Array version (vectorisation)
#            Using point and centroids calculations, like used in MR k-means
#            Calculating the distance to nearest cluster and new mean
#            pointsInClusters = np.zeros(numberOfClusters)
#            meansNew = np.zeros((numberOfClusters, len(data[0])))
#            for i in xrange(len(data)):
#                point = data[i]
#                d = np.sqrt(np.sum((point-means)**2,axis=1))
#                minCentroidIdx = d.argmin()
#                labels[i] = minCentroidIdx
#                meansNew[minCentroidIdx] += point
#                pointsInClusters[minCentroidIdx] += 1
#            
#            for i in xrange(len(meansNew)):
#                meansNew[i] = meansNew[i] / float(pointsInClusters[i])
                
                
            #measure calculation time
            end = time()
            print 'time: %f' % (end-start)
            total += (end-start)
            
            #check if the means have changed  
            meansDiff = 0  
            for i in xrange(numberOfClusters):
                pprint('%s %s' % (i, meansNew[i]))
                meansDiff += distance.euclidean(meansNew[i], means[i])
                
            print 'Means difference: %f' % meansDiff
            
            #calculate the within cluster variation, sum of squared distances between all objects in cluster and its centroid
            SSE = Utils.calcSSE(data, meansNew)            
            print "SSE: %0.3f" % SSE
            
            #plotting
#            self.doPlots(data, labels, means, iteration, title='iteration %i meansdiff: %f' % (iteration, meansDiff))
                
            means[:] = meansNew
            if meansDiff < threshold:
                break
            
            
        
        #End of While loop
        #KMeans iterative process ends here
        
        #If mean difference under threshold or there are max iterations
        if iteration == maxiterations:
            print "Max iterations reached: %d" % iteration
        else:
            print "Means difference: %f is under threshold %f" % (meansDiff, threshold)
            
        #print "Clusters:"
        for i in xrange(numberOfClusters):
            print "Cluster %d, number of points %d" % (i, pointsInClusters[i])
        
        if self.showsubplots:
            Plot.subplotClusters(data, labels, means, iteration, title='final means')
            pylab.show()
            
        print 'total time: %f' %total
       
            
        return means, labels, SSE

示例#2

显示文件

文件： kmeans.py 项目： sigurdurm/k-means

 def doPlots(self, data, labels, means, iteration, title):
     if not self.showsubplots:
         Plot.plotIntermediateClusters(data, labels, means, title)
     else:
         Plot.subplotClusters(data, labels, means, iteration, title)