def distances_sum(point, point_array, distance_method=('euclidean', 2)): distances = float(0) m = np.shape(point_array)[0] point = np.asarray(point)[0] for j in range(m): if distance_method[0] == 'euclidean': distances += np.power( distance.euclidean_distance(point, np.asarray(point_array[j])[0]), distance_method[1]) elif distance_method[0] == 'manhattan': distances += np.power( distance.manhattan_distance(point, np.asarray(point_array[j])[0]), distance_method[1]) elif distance_method[0] == 'minkowski': distances += np.power( distance.minkowski_distance(point, np.asarray(point_array[j])[0], distance_method[1]), distance_method[1]) else: distances += np.power( distance.euclidean_distance(point, point_array[j]), distance_method[1]) '''or distance_j = distance.minkowski_distance(point, centroid_j, distance_method[1]) ''' return distances
def find_distance_min(point, point_array, distance_method=('euclidean', 2)): min_distance = float('inf') min_centroid_index = -1 k = np.shape(point_array)[0] point = np.asarray(point)[0] for j in range(k): centroid_j = point_array[j] if distance_method[0] == 'euclidean': distance_j = distance.euclidean_distance(point, centroid_j) elif distance_method[0] == 'manhattan': distance_j = distance.manhattan_distance(point, centroid_j) elif distance_method[0] == 'minkowski': distance_j = distance.minkowski_distance(point, centroid_j, distance_method[1]) else: distance_j = distance.euclidean_distance(point, centroid_j) '''or distance_j = distance.minkowski_distance(point, centroid_j, distance_method[1]) ''' if distance_j < min_distance: min_distance = distance_j min_centroid_index = j return min_centroid_index, min_distance
def window(prot1, prot2, instance, w): di = dt.euclidean_distance(instance, prot1) dj = dt.euclidean_distance(instance, prot2) if di != 0 and dj != 0: mini = min(di / dj, dj / di) else: mini = 0 s = ((1 - w) / (1 + w)) return (mini > s)
def __qp(self, X, kernel, C): n_samples = X.shape[0] P = 2 * kernel q = -kernel[range(n_samples), range(n_samples)].reshape(-1, 1) G = np.vstack((-np.eye(n_samples), np.eye(n_samples))) h = np.hstack((np.zeros(n_samples), np.full(n_samples, C))) A = np.full((1, n_samples), 1.0) b = np.ones(1) res = cvxopt.solvers.qp(cvxopt.matrix(P), cvxopt.matrix(q), cvxopt.matrix(G), cvxopt.matrix(h), cvxopt.matrix(A), cvxopt.matrix(b)) alpha = np.array(res['x']).ravel() support_items = np.flatnonzero(np.isclose(alpha, 0) == False) self.__X_support = X[support_items] self.__a_support = alpha[support_items] free_items = np.flatnonzero(self.__a_support < C) X_free = self.__X_support[free_items] self.__center = self.__a_support.dot(self.__X_support) self.__radius = np.mean(distance.euclidean_distance(self.__center, X_free))
def kNN(k, data, instance): """ Returns a list with the k instances in the data set closest to a given instance """ # Extract the real data classification = data['data'] # convert into float to be able to calculate euclidean distance for index, li in enumerate(classification): aux_list = list(float(x) for x in li[:-1]) classification[index] = aux_list + [li[-1]] # initialize a dict with all votes to 0 classification_dict = {val: 0 for val in data['attributes'][-1][-1]} # add the distance to the instance in a new field for i in classification: i.append(euclidean_distance(instance, i[:-1])) # Sort the classification by the last element sorted_classification = sorted(classification, key=operator.itemgetter(-1)) # we get the value of the K elements with shortest distance for x in sorted_classification[:k]: classification_dict[x[-2]] += 1 # Generate final candidates candidates_list = list() maximum_value = -1 for key, val in classification_dict.items(): if val > maximum_value: candidates_list = [key] maximum_value = val elif val == maximum_value: candidates_list.append(key) return candidates_list
def k_neighbors(self, unknown, dataset, k): """ generate the closest neighbors list """ distances = [] for title in dataset: point = dataset[title] distance_to_point = distance.euclidean_distance(point, unknown) distances.append([distance_to_point, title]) distances.sort() neighbors = distances[0:k] return neighbors
def fit(self, X, y, learning_rate, epochs): ''' Parameters ---------- X : shape (n_samples, n_features) Training data y : shape (n_samples,) Target values learning_rate : learning rate epochs : The number of epochs ''' n_samples, n_features = X.shape classes = np.unique(y) n_classes = len(classes) self.__prototypes = np.zeros((n_classes, n_features)) self.__prototypes_labels = np.zeros(n_classes) for i in range(n_classes): index_prototype = np.random.choice(np.flatnonzero(y == classes[i]), 1) self.__prototypes[i] = X[index_prototype] self.__prototypes_labels[i] = y[index_prototype] for _ in range(epochs): index = np.random.choice(n_samples, 1) distances = distance.euclidean_distance(X[index], self.__prototypes) nearest_index = np.argmin(distances) if self.__prototypes_labels[nearest_index] == y[index]: self.__prototypes[nearest_index] += learning_rate * ( X[index] - self.__prototypes[nearest_index]).ravel() else: self.__prototypes[nearest_index] -= learning_rate * ( X[index] - self.__prototypes[nearest_index]).ravel()
def dist(x, y): pos_d = euclidean_distance(x[:, :2], y[:2]) pix_d = euclidean_distance(x[:, 2:], y[2:]) d = np.sqrt(pos_d**2 + 5 * pix_d**2) return d
def test_known3(): u = np.array([0, 0]) v = np.array([-3, -4]) assert_almost_equal(euclidean_distance(u, v), 5)
def test_known1(): u = np.array([0]) v = np.array([3]) assert_almost_equal(euclidean_distance(u, v), 3)
def test_triangle(): u = np.random.random(3) v = np.random.random(3) w = np.random.random(3) assert euclidean_distance( u, w) <= euclidean_distance(u, v) + euclidean_distance(v, w)
def test_symmetry(): for i in range(10): u = np.random.random(3) v = np.random.random(3) assert euclidean_distance(u, v) == euclidean_distance(v, u)
def test_when_not_zero(): for i in range(10): u = np.random.random(3) v = np.zeros(3) assert euclidean_distance(u, v) != 0
def test_when_zero(): u = np.zeros(3) v = np.zeros(3) assert euclidean_distance(u, v) == 0
def test_non_negative(): for i in range(10): u = np.random.normal(3) v = np.random.normal(3) assert euclidean_distance(u, v) >= 0
from distance import euclidean_distance if __name__ == '__main__': pairs_of_points = [((0, 0), (0, 1)), ((0, 0), (1.5, 0)), ((0, 3), (4, 0)), ((0, 0), (1, 1)), ((-1, 0), (1, 1)), ((0, 0), (1, 3))] distances = [] print 'The distances are:' for pair in pairs_of_points: distances.append(euclidean_distance(pair[0], pair[1])) print distances print 'The distances in ascending order are:' distances.sort() print distances print 'The miminal distance in the list is:' print distances[0] print 'The two largest distances in the list are:' print distances[-2], distances[-1] print 'The distances whose round value is even are:' print filter(lambda distance: distance % 2 == 0, [round(distance) for distance in distances])
print("Tweet 1: " + str(z_stemmerem[0]) + ", TFIDF: " + str(bag_of_words[0])) print() print("Tweet 2: " + str(z_stemmerem[1]) + ", TFIDF: " + str(bag_of_words[1])) print() print("Tweet 3: " + str(z_stemmerem[2]) + ", TFIDF: " + str(bag_of_words[2])) print() print("Tweet 4: " + str(z_stemmerem[3]) + ", TFIDF: " + str(bag_of_words[3])) print() print("DYSTANS JACCARDA") print(distance.jaccard_distance(bag_of_words[0], bag_of_words[1])) print() print("DYSTANS EUKLIDESA") print(distance.euclidean_distance(bag_of_words[0], bag_of_words[1])) print() print("DYSTANS COSINE") print(distance.cosine_distance(bag_of_words[0], bag_of_words[1])) print() print(nowynowy) print("Demnostracja tf_idf") print(calculate_tf_idf(tweet=nowynowy[0], corpus=nowynowy)) # kmeans test print('----------------KMEANS --------------') tweet1 = ['poland', 'poland', 'poland', 'poland', 'good', 'small', 'time'] tweet2 = ['poland', 'poland', 'poland', 'bad', 'big', 'coronavirus'] tweet3 = ['poland', 'poland', 'poland', 'growth', 'small', 'time'] tweet4 = ['poland', 'poland', 'poland', 'bad', 'big', 'virus']