def kmeans(points, k, cutoff): tmp = [] for p in points: print p try: lat = float(p[1]) lon = float(p[2]) tmp.append(Point([lat, lon])) except: continue points = tmp # Randomly sample k Points from the points list, build Clusters around them initial = random.sample(points, k) clusters = [] for p in initial: clusters.append(Cluster([p])) print " clusters: %s" % clusters # Enter the program loop while True: # Make a list for each Cluster lists = [] for c in clusters: lists.append([]) # For each Point: for p in points: # Figure out which Cluster's centroid is the nearest smallest_distance = dist_on_earth(p.coords, clusters[0].centroid.coords) index = 0 for i in range(len(clusters[1:])): distance = dist_on_earth(p.coords, clusters[i + 1].centroid.coords) if distance < smallest_distance: smallest_distance = distance index = i + 1 # Add this Point to that Cluster's corresponding list lists[index].append(p) # Update each Cluster with the corresponding list # Record the biggest centroid shift for any Cluster biggest_shift = 0.0 for i in range(len(clusters)): shift = clusters[i].update(lists[i]) biggest_shift = max(biggest_shift, shift) # If the biggest centroid shift is less than the cutoff, stop if biggest_shift < cutoff: break tmp = [] for c in clusters: tmp.append([len(c.points), c.centroid.coords]) return tmp # Return the list of cluster attributes return tmp
def kmeans(points, k, cutoff): tmp = [] for p in points: print p try: lat = float(p[1]) lon = float(p[2]) tmp.append(Point([lat,lon])) except: continue points = tmp # Randomly sample k Points from the points list, build Clusters around them initial = random.sample(points, k) clusters = [] for p in initial: clusters.append(Cluster([p])) print " clusters: %s" % clusters # Enter the program loop while True: # Make a list for each Cluster lists = [] for c in clusters: lists.append([]) # For each Point: for p in points: # Figure out which Cluster's centroid is the nearest smallest_distance = dist_on_earth(p.coords, clusters[0].centroid.coords) index = 0 for i in range(len(clusters[1:])): distance = dist_on_earth(p.coords, clusters[i+1].centroid.coords) if distance < smallest_distance: smallest_distance = distance index = i+1 # Add this Point to that Cluster's corresponding list lists[index].append(p) # Update each Cluster with the corresponding list # Record the biggest centroid shift for any Cluster biggest_shift = 0.0 for i in range(len(clusters)): shift = clusters[i].update(lists[i]) biggest_shift = max(biggest_shift, shift) # If the biggest centroid shift is less than the cutoff, stop if biggest_shift < cutoff: break tmp = [] for c in clusters: tmp.append([len(c.points),c.centroid.coords]) return tmp # Return the list of cluster attributes return tmp
def update(self, points): old_centroid = self.centroid self.points = points self.centroid = self.calculateCentroid() return dist_on_earth(old_centroid.coords, self.centroid.coords)