Пример #1
0
 def kmean(self,k,cutoff):
         initials = random.sample(self.files,k)
         clusters = []
         # print 'Iteration ==> ',k
         for p in initials: 
             clusters.append(Cluster(self.id,files=[p]))
         while True:
                 lists = []
                 for c in clusters: lists.append([])
                 for p in self.files: 
                     smallest_distance = utils.getDistance(p.vector,clusters[0].centroid)
                     index = 0
                     for i in range(len(clusters[1:])):
                         distance = utils.getDistance(p.vector, clusters[i+1].centroid)
                         if distance < smallest_distance:
                             smallest_distance = distance
                             index = i+1
           # Add this Point to that Cluster's corresponding list
                     lists[index].append(p)
       # Update each Cluster with the corresponding list
       # Record the biggest centroid shift for any Cluster
                 biggest_shift = 0.0
                 for i in range(len(clusters)):
                     shift = clusters[i].addfiles(lists[i])
                     biggest_shift = max(biggest_shift, shift)
       # If the biggest centroid shift is less than the cutoff, stop
                 if biggest_shift < cutoff: break
   # Return the list of Clusters
         # print 'Total clusters => ',len(clusters)
         for cl in clusters:
             cl.centroid = cl.calculateCentroid()
             cl.calculateRadius()
             #print '\n-------\n',cl,cl.files,cl.radius,len(cl.files)
       
         return clusters
Пример #2
0
    def calculateRadius(self):
        max = -1
        for p in self.files:
            dist = utils.getDistance(self.centroid, p.vector)
            if dist > max: max = dist

        self.radius = max
        return max
Пример #3
0
 def calculateRadius(self):
   max = -1
   for p in self.files:
       dist = utils.getDistance(self.centroid,p.vector)
       if dist > max: max = dist
           
   self.radius = max
   return max
Пример #4
0
 def getScore(self,vector,centroid,clusterdata,alldata):
     dist = utils.getDistance(vector,centroid)
     return  clusterdata/(dist*alldata)