def __init__(self, n_cluster: int, data: np.ndarray, use_kmeans: bool = False, w: float = 0.9, c1: float = 0.5, c2: float = 0.3, flag: int = 1, weights: list = None): index = np.random.choice(list(range(len(data))), n_cluster) self.centroids = data[index].copy() if use_kmeans: kmeans = KMeans(n_cluster=n_cluster, init_pp=False) kmeans.fit(data) self.centroids = kmeans.centroid.copy() self.best_position = self.centroids.copy() self.best_score = quantization_error(self.centroids, self._predict(data), data) self.flag=flag if self.flag%2==1: self.best_sse = calc_sse(self.centroids, self._predict(data), data) else: self.best_sse = calc_sse2(self.centroids, self._predict(data), data, weights) self.velocity = np.zeros_like(self.centroids) self._w = w self._c1 = c1 self._c2 = c2
def _update_centroids(self, data: np.ndarray): self.centroids = self.centroids + self.velocity new_score = quantization_error(self.centroids, self._predict(data), data) sse = calc_sse(self.centroids, self._predict(data), data) self.best_sse = min(sse, self.best_sse) if new_score < self.best_score: self.best_score = new_score self.best_position = self.centroids.copy()
def __init__(self, n_cluster: int, data: np.ndarray, use_kmeans: bool = False, w: int = 9, c1: int = 5, c2: int = 3): index = np.random.choice(list(range(len(data))), n_cluster) self.centroids = data[index].copy() if use_kmeans: kmeans = KMeans(n_cluster=n_cluster, init_pp=False) kmeans.fit(data) self.centroids = kmeans.centroid.copy() self.best_position = self.centroids.copy() self.best_score = quantization_error(self.centroids, self._predict(data), data) self.best_sse = calc_sse(self.centroids, self._predict(data), data) self.velocity = np.zeros_like(self.centroids) self._w = w self._c1 = c1 self._c2 = c2
def kmeans_custom(k, data, stdev_mean): num_examples = len(data) num_features = len(data[0]['point']) min_sse = float("inf") min_sse_clusters = [] sse_list = [] clusters = init_clusters_custom(k, data) # do 50 iterations for i in range (0, 49): kmeans.calc_cluster_centroids(clusters) if kmeans.reassign_clusters(clusters) == False: # print "break at " + str(i) break sse = kmeans.calc_sse(clusters) if sse < min_sse: min_sse_clusters = copy.deepcopy(clusters) min_sse = sse print "k = " + str(k) kmeans.print_cluster_centroids(min_sse_clusters, stdev_mean) print ""