def _get_neighbors(self, sample_i): neighbors = [] for _sample_i, _sample in enumerate(self.X): if _sample_i != sample_i and euclidean_distance( self.X[sample_i], _sample) < self.eps: neighbors.append(_sample_i) return np.array(neighbors)
def _calculate_cost(self, X, clusters, medoids): cost = 0 # For each cluster for i, cluster in enumerate(clusters): medoid = medoids[i] for sample_i in cluster: # Add distance between sample and medoid as cost cost += euclidean_distance(X[sample_i], medoid) return cost
def _closest_medoid(self, sample, medoids): closest_i = None closest_distance = float("inf") for i, medoid in enumerate(medoids): distance = euclidean_distance(sample, medoid) if distance < closest_distance: closest_i = i closest_distance = distance return closest_i
def _closest_centroid(self, sample, centroids): """ Return the index of the closest centroid to the sample """ closest_i = None closest_distance = float("inf") for i, centroid in enumerate(centroids): distance = euclidean_distance(sample, centroid) if distance < closest_distance: closest_i = i closest_distance = distance return closest_i
def _get_neighbors(self, sample_i): """ Return a list of indexes of neighboring samples A sample_2 is considered a neighbor of sample_1 if the distance between them is smaller than epsilon """ neighbors = [] for _sample_i, _sample in enumerate(self.X): if _sample_i != sample_i and euclidean_distance( self.X[sample_i], _sample) < self.eps: neighbors.append(_sample_i) return np.array(neighbors)
def predict(self, X_test, X_train, y_train): classes = np.unique(y_train) y_pred = [] # Determine the class of each sample for test_sample in X_test: neighbors = [] # Calculate the distance form each observed sample to the # sample we wish to predict for j, observed_sample in enumerate(X_train): distance = euclidean_distance(test_sample, observed_sample) label = y_train[j] # Add neighbor information neighbors.append([distance, label]) neighbors = np.array(neighbors) # Sort the list of observed samples from lowest to highest distance # and select the k first k_nearest_neighbors = neighbors[neighbors[:, 0].argsort()][:self.k] # Do a majority vote among the k neighbors and set prediction as the # class receing the most votes label = self._majority_vote(k_nearest_neighbors, classes) y_pred.append(label) return np.array(y_pred)