def refresh_cluster(self): self.clusters = [] for data in self.dataset: cluster = 0 min_dist = eucledian_distance(data, self.centroids[0]) for i in range(1, len(self.centroids)): if eucledian_distance(data, self.centroids[i]) < min_dist: min_dist = eucledian_distance(data, self.centroids[i]) cluster = i self.clusters.append(cluster)
def predict(self, test_data): result = [] for td in test_data: closest_centroid = 0 min_distance = eucledian_distance(self.centroids[closest_centroid], td) for i in range(1, len(self.centroids)): distance = eucledian_distance(self.centroids[i], td) if (distance < min_distance): min_distance = distance closest_centroid = i result.append(closest_centroid) return result
def scikmeans_predict(scikit_kmeans, test_data): result = [] for td in test_data: closest_centroid = 0 min_distance = eucledian_distance( scikit_kmeans.cluster_centers_[closest_centroid], td) for i in range(1, len(scikit_kmeans.cluster_centers_)): distance = eucledian_distance(scikit_kmeans.cluster_centers_[i], td) if (distance < min_distance): min_distance = distance closest_centroid = i result.append(closest_centroid) return result
def search_neighbor(self, data_index): neighbors = [] data = self.dataset[data_index] for i in range(len(self.dataset)): dist = eucledian_distance(self.dataset[i], data) if (dist < self.eps and i != data_index): neighbors.append(i) return neighbors
def scidbscan_predict(scikitdbscan, train_data, test_data): result = [] test = [] for td in test_data: closest_data = 0 min_distance = 9999 for i in range(len(train_data)): distance = eucledian_distance(train_data[i], td) if (distance < min_distance and scikitdbscan.labels_[i] != -1): min_distance = distance closest_data = i result.append(scikitdbscan.labels_[closest_data]) return result
def predict(self, test_data): result = [] test = [] print(len(self.dataset)) for td in test_data: closest_data = 0 min_distance = 9999 for i in range(len(self.dataset)): distance = eucledian_distance(self.dataset[i], td) if (distance < min_distance and self.cluster[i] != -1): min_distance = distance closest_data = i result.append(self.cluster[closest_data]) return result
def predict_gaussian_naive_bayes(training_data, training_labels, test_data, test_labels, errors): print("Predicting GNB.....") predictor = GaussianNB() #convert training labels from integer tuples to strings training_labels_str = [str(label) for label in training_labels] #test_labels_str = [str(label) for label in test_labels] pred_labels_str = predictor.fit(training_data, training_labels_str).predict(test_data) pred_labels = [make_tuple(label) for label in pred_labels_str] #error = accuracy_score(pred_labels, test_labels_str, normalize = False); for i in range(len(pred_labels)): errors.append(utils.eucledian_distance(pred_labels[i], test_labels[i]))
def generate_feature_vectors_at_centroid(K, T, SD, centroid, transmitter_locs, feature_vector, labels, seed): feature_vectors_at_centroid = [] for i in range(utils.FEATURES_PER_CELL): features = [] for j in range(T): d = utils.eucledian_distance(centroid, transmitter_locs[j]) if d == 0: #centroid coincides with one of the transmitter, ignore that centroid return feature_vectors_at_centroid pl_d = utils.generate_power_at_d(d, K, SD, seed) features.append(pl_d) feature_vectors_at_centroid.append(features) feature_vector.append(features) labels.append(centroid) return feature_vectors_at_centroid
def get_init_means(): mean_init = [] grid_indices, transmitter_locs = utils.generate_transmitter_locations( utils.K, utils.T, utils.SEED) centroids = get_locations() for i in range(len(centroids)): mean_cell = [] for j in range(utils.T): d = utils.eucledian_distance(transmitter_locs[j], centroids[i]) if d == 0: mean_cell.append(utils.POWER_T) else: mean_cell.append( utils.generate_power_at_d(d, utils.K, 0, utils.SEED)) mean_init.append(mean_cell) print(len(mean_init)) return mean_init
def predict(self, test_data, test_labels, res): print("Testing") errors = [] pred_classes = [] pred_locs = [] for start_index in range(0, len(test_data), BATCH_SIZE): end_index = start_index + BATCH_SIZE test_batch = test_data[start_index:end_index] pred_classes.extend(self.gmm.predict(test_batch).tolist()) for i in range(len(pred_classes)): loc = self.get_location(pred_classes[i]) pred_locs.append(loc) errors.append(utils.eucledian_distance(loc, test_labels[i])) avg_error = float(sum(errors)) / len(errors) response = { "predictions": pred_locs, "avg_error": avg_error } res.body = json.dumps(response)