def _get_neighbors(self, sample_i): neighbors = [] for _sample_i, _sample in enumerate(self.X): if _sample_i != sample_i and euclidean_distance( self.X[sample_i], _sample) < self.eps: neighbors.append(_sample_i) return np.array(neighbors)
def _closest_centroid(self, sample, centroids): closest_i = None closest_distance = float("inf") for i, centroid in enumerate(centroids): distance = euclidean_distance(sample, centroid) if distance < closest_distance: closest_i = i closest_distance = distance return closest_i
def _calculate_cost(self, X, clusters, medoids): cost = 0 # For each cluster for i, cluster in enumerate(clusters): medoid = medoids[i] for sample_i in cluster: # Add distance between sample and medoid as cost cost += euclidean_distance(X[sample_i], medoid) return cost
def predict(self, X_test, X_train, y_train): classes = np.unique(y_train) y_pred = [] for i in range(len(X_test)): test_sample = X_test[i] neighbors = [] for j in range(len(X_train)): observed_sample = X_train[j] distance = euclidean_distance(test_sample, observed_sample) label = y_train[j] neighbors.append([distance, label]) neighbors = np.array(neighbors) k_nearest_neighbors = neighbors[neighbors[:,0].argsort()][:self.k] label = self._get_vote(k_nearest_neighbors, classes) y_pred.append(label) return np.array(y_pred)
def predict(self, X_test, X_train, y_train): classes = np.unique(y_train) pred_classes = [] for test_sample in X_test: # 获得[欧拉距离,标签]的矩阵 distances = [] for i, train_sample in enumerate(X_train): distance = euclidean_distance(test_sample, train_sample) label = y_train[i] distances.append([distance, label]) distances = np.array(distances) # 获得前k个最小的欧拉距离的点。 index_argsort = np.argsort(distances[:, 0]) neighbors = distances[index_argsort][:self.k] # 利用多数表决进行投票 pred_cla = self._majority_vote(neighbors, classes) pred_classes.append(pred_cla) return np.array(pred_classes)
def predict(self, X_test, X_train, y_train): classes = np.unique(y_train) y_pred = [] # Determine the class of each sample for test_sample in X_test: neighbors = [] # Calculate the distance form each observed sample to the # sample we wish to predict for j, observed_sample in enumerate(X_train): distance = euclidean_distance(test_sample, observed_sample) label = y_train[j] # Add neighbor information neighbors.append([distance, label]) neighbors = np.array(neighbors) # Sort the list of observed samples from lowest to highest distance # and select the k first k_nearest_neighbors = neighbors[neighbors[:, 0].argsort()][:self.k] # Do a majority vote among the k neighbors and set prediction as the # class receing the most votes label = self._get_vote(k_nearest_neighbors, classes) y_pred.append(label) return np.array(y_pred)
def predict(self, X_test, X_train, y_train): classes = np.unique(y_train) y_pred = [] # Determine the class of each sample for test_sample in X_test: neighbors = [] # Calculate the distance form each observed sample to the # sample we wish to predict for j, observed_sample in enumerate(X_train): distance = euclidean_distance(test_sample, observed_sample) label = y_train[j] # Add neighbor information neighbors.append([distance, label]) neighbors = np.array(neighbors) # Sort the list of observed samples from lowest to highest distance # and select the k first k_nearest_neighbors = neighbors[neighbors[:, 0].argsort()][:self.k] # Do a majority vote among the k neighbors and set prediction as the # class receing the most votes label = self._majority_vote(k_nearest_neighbors, classes) y_pred.append(label) return np.array(y_pred)