Exemplo n.º 1
0
def distances_sum(point, point_array, distance_method=('euclidean', 2)):
    distances = float(0)

    m = np.shape(point_array)[0]
    point = np.asarray(point)[0]

    for j in range(m):
        if distance_method[0] == 'euclidean':
            distances += np.power(
                distance.euclidean_distance(point,
                                            np.asarray(point_array[j])[0]),
                distance_method[1])
        elif distance_method[0] == 'manhattan':
            distances += np.power(
                distance.manhattan_distance(point,
                                            np.asarray(point_array[j])[0]),
                distance_method[1])
        elif distance_method[0] == 'minkowski':
            distances += np.power(
                distance.minkowski_distance(point,
                                            np.asarray(point_array[j])[0],
                                            distance_method[1]),
                distance_method[1])
        else:
            distances += np.power(
                distance.euclidean_distance(point, point_array[j]),
                distance_method[1])
        '''or
        distance_j = distance.minkowski_distance(point, centroid_j, distance_method[1])
        '''

    return distances
Exemplo n.º 2
0
def find_distance_min(point, point_array, distance_method=('euclidean', 2)):
    min_distance = float('inf')
    min_centroid_index = -1

    k = np.shape(point_array)[0]

    point = np.asarray(point)[0]

    for j in range(k):
        centroid_j = point_array[j]
        if distance_method[0] == 'euclidean':
            distance_j = distance.euclidean_distance(point, centroid_j)
        elif distance_method[0] == 'manhattan':
            distance_j = distance.manhattan_distance(point, centroid_j)
        elif distance_method[0] == 'minkowski':
            distance_j = distance.minkowski_distance(point, centroid_j,
                                                     distance_method[1])
        else:
            distance_j = distance.euclidean_distance(point, centroid_j)
        '''or
        distance_j = distance.minkowski_distance(point, centroid_j, distance_method[1])
        '''

        if distance_j < min_distance:
            min_distance = distance_j
            min_centroid_index = j

    return min_centroid_index, min_distance
Exemplo n.º 3
0
def window(prot1, prot2, instance, w):
    di = dt.euclidean_distance(instance, prot1)
    dj = dt.euclidean_distance(instance, prot2)
    if di != 0 and dj != 0:
        mini = min(di / dj, dj / di)
    else:
        mini = 0
    s = ((1 - w) / (1 + w))

    return (mini > s)
    def __qp(self, X, kernel, C):       
        n_samples = X.shape[0]

        P = 2 * kernel

        q = -kernel[range(n_samples), range(n_samples)].reshape(-1, 1)

        G = np.vstack((-np.eye(n_samples), np.eye(n_samples)))

        h = np.hstack((np.zeros(n_samples), np.full(n_samples, C)))

        A = np.full((1, n_samples), 1.0)

        b = np.ones(1)

        res = cvxopt.solvers.qp(cvxopt.matrix(P), cvxopt.matrix(q), cvxopt.matrix(G), cvxopt.matrix(h), cvxopt.matrix(A), cvxopt.matrix(b))
        alpha = np.array(res['x']).ravel()

        support_items = np.flatnonzero(np.isclose(alpha, 0) == False)
        self.__X_support = X[support_items]
        self.__a_support = alpha[support_items]

        free_items = np.flatnonzero(self.__a_support < C)
        X_free = self.__X_support[free_items]
        
        self.__center = self.__a_support.dot(self.__X_support)
        self.__radius = np.mean(distance.euclidean_distance(self.__center, X_free))
def kNN(k, data, instance):
    """
    Returns a list with the k instances in the data set closest to a given instance
    """
    # Extract the real data
    classification = data['data']
    # convert into float to be able to calculate euclidean distance
    for index, li in enumerate(classification):
        aux_list = list(float(x) for x in li[:-1])
        classification[index] = aux_list + [li[-1]]

    # initialize a dict with all votes to 0
    classification_dict = {val: 0 for val in data['attributes'][-1][-1]}
    # add the distance to the instance in a new field
    for i in classification:
        i.append(euclidean_distance(instance, i[:-1]))
    # Sort the classification by the last element
    sorted_classification = sorted(classification, key=operator.itemgetter(-1))

    # we get the value of the K elements with shortest distance
    for x in sorted_classification[:k]:
        classification_dict[x[-2]] += 1

    # Generate final candidates
    candidates_list = list()
    maximum_value = -1
    for key, val in classification_dict.items():
        if val > maximum_value:
            candidates_list = [key]
            maximum_value = val
        elif val == maximum_value:
            candidates_list.append(key)

    return candidates_list
Exemplo n.º 6
0
 def k_neighbors(self, unknown, dataset, k):
     """
     generate the closest neighbors list
     """
     distances = []
     for title in dataset:
         point = dataset[title]
         distance_to_point = distance.euclidean_distance(point, unknown)
         distances.append([distance_to_point, title])
     distances.sort()
     neighbors = distances[0:k]
     return neighbors
    def fit(self, X, y, learning_rate, epochs):
        '''
        Parameters
        ----------
        X : shape (n_samples, n_features)
            Training data
        y : shape (n_samples,)
            Target values
        learning_rate : learning rate
        epochs : The number of epochs
        '''
        n_samples, n_features = X.shape

        classes = np.unique(y)
        n_classes = len(classes)

        self.__prototypes = np.zeros((n_classes, n_features))
        self.__prototypes_labels = np.zeros(n_classes)
        for i in range(n_classes):
            index_prototype = np.random.choice(np.flatnonzero(y == classes[i]),
                                               1)
            self.__prototypes[i] = X[index_prototype]
            self.__prototypes_labels[i] = y[index_prototype]

        for _ in range(epochs):
            index = np.random.choice(n_samples, 1)

            distances = distance.euclidean_distance(X[index],
                                                    self.__prototypes)
            nearest_index = np.argmin(distances)

            if self.__prototypes_labels[nearest_index] == y[index]:
                self.__prototypes[nearest_index] += learning_rate * (
                    X[index] - self.__prototypes[nearest_index]).ravel()
            else:
                self.__prototypes[nearest_index] -= learning_rate * (
                    X[index] - self.__prototypes[nearest_index]).ravel()
Exemplo n.º 8
0
def dist(x, y):
    pos_d = euclidean_distance(x[:, :2], y[:2])
    pix_d = euclidean_distance(x[:, 2:], y[2:])
    d = np.sqrt(pos_d**2 + 5 * pix_d**2)
    return d
Exemplo n.º 9
0
def test_known3():
    u = np.array([0, 0])
    v = np.array([-3, -4])
    assert_almost_equal(euclidean_distance(u, v), 5)
Exemplo n.º 10
0
def test_known1():
    u = np.array([0])
    v = np.array([3])
    assert_almost_equal(euclidean_distance(u, v), 3)
Exemplo n.º 11
0
def test_triangle():
    u = np.random.random(3)
    v = np.random.random(3)
    w = np.random.random(3)
    assert euclidean_distance(
        u, w) <= euclidean_distance(u, v) + euclidean_distance(v, w)
Exemplo n.º 12
0
def test_symmetry():
    for i in range(10):
        u = np.random.random(3)
        v = np.random.random(3)
        assert euclidean_distance(u, v) == euclidean_distance(v, u)
Exemplo n.º 13
0
def test_when_not_zero():
    for i in range(10):
        u = np.random.random(3)
        v = np.zeros(3)
        assert euclidean_distance(u, v) != 0
Exemplo n.º 14
0
def test_when_zero():
    u = np.zeros(3)
    v = np.zeros(3)
    assert euclidean_distance(u, v) == 0
Exemplo n.º 15
0
def test_non_negative():
    for i in range(10):
        u = np.random.normal(3)
        v = np.random.normal(3)
        assert euclidean_distance(u, v) >= 0
Exemplo n.º 16
0
from distance import euclidean_distance

if __name__ == '__main__':
    pairs_of_points = [((0, 0), (0, 1)), ((0, 0), (1.5, 0)), ((0, 3), (4, 0)),
                       ((0, 0), (1, 1)), ((-1, 0), (1, 1)), ((0, 0), (1, 3))]
    distances = []

    print 'The distances are:'
    for pair in pairs_of_points:
        distances.append(euclidean_distance(pair[0], pair[1]))
    print distances

    print 'The distances in ascending order are:'
    distances.sort()
    print distances

    print 'The miminal distance in the list is:'
    print distances[0]

    print 'The two largest distances in the list are:'
    print distances[-2], distances[-1]

    print 'The distances whose round value is even are:'
    print filter(lambda distance: distance % 2 == 0,
                 [round(distance) for distance in distances])
Exemplo n.º 17
0
print("Tweet 1: " + str(z_stemmerem[0]) + ", TFIDF: " + str(bag_of_words[0]))

print()
print("Tweet 2: " + str(z_stemmerem[1]) + ", TFIDF: " + str(bag_of_words[1]))
print()
print("Tweet 3: " + str(z_stemmerem[2]) + ", TFIDF: " + str(bag_of_words[2]))

print()
print("Tweet 4: " + str(z_stemmerem[3]) + ", TFIDF: " + str(bag_of_words[3]))
print()

print("DYSTANS JACCARDA")
print(distance.jaccard_distance(bag_of_words[0], bag_of_words[1]))
print()
print("DYSTANS EUKLIDESA")
print(distance.euclidean_distance(bag_of_words[0], bag_of_words[1]))
print()
print("DYSTANS COSINE")
print(distance.cosine_distance(bag_of_words[0], bag_of_words[1]))

print()
print(nowynowy)
print("Demnostracja tf_idf")
print(calculate_tf_idf(tweet=nowynowy[0], corpus=nowynowy))

# kmeans test
print('----------------KMEANS --------------')
tweet1 = ['poland', 'poland', 'poland', 'poland', 'good', 'small', 'time']
tweet2 = ['poland', 'poland', 'poland', 'bad', 'big', 'coronavirus']
tweet3 = ['poland', 'poland', 'poland', 'growth', 'small', 'time']
tweet4 = ['poland', 'poland', 'poland', 'bad', 'big', 'virus']