示例#1
0
def robustness(target, target_expl, neighborhood, neighborhood_expl):
    ratios = []
    for i, neighbor in enumerate(neighborhood):
        ratio = cosine_dist(target_expl, neighborhood_expl[i]) / cosine_dist(
            target, neighbor)
        ratios.append(ratio)
    return max(ratios)
def cosine_sim(vector, centroid):

	#Convert Arrays and Matricies to Matrix Type for preventing unexpeted error, such as returning vector instead of scalar
	vector = np.matrix(vector)
	centroid = np.matrix(centroid)

	return 1 - cosine_dist(vector, centroid) 
示例#3
0
 def _calculate_dist(self, strategy='l2_dist'):
     assert strategy in ['l2_dist', 'cosine']
     if strategy == 'l2_dist':
         diff = np.subtract(self.source_embeddings, self.target_embedggins)
         dist = np.sum(np.square(diff), 1)
     else:
         dist = [
             cosine_dist(self.source_embeddings[i],
                         self.target_embedggins[i])
             for i in range(len(self.source_embeddings))
         ]
         dist = np.stack(dist, axis=0)
     return dist
示例#4
0
def get_nonego_simmilarity_vec(source_node, data , ind_list, cutoff
                               , simm_fun = lambda x,y: 1 - cosine_dist(x,y)
                               , sparseflag = False):
    """
    returns a vector with most similar values of nodes from @ind_list
    :param source_node:
    :param data:
    :param ind_list:
    :param cutoff:
    :param simm_fun:
    :return:
    """
    f_ref = data[source_node]
    out = np.zeros((1,data.shape[0]))
    out = scipy.sparse.dok_matrix(out) if sparseflag else out
    rank = []
    for n in ind_list:
        bisect.insort(rank,(simm_fun(f_ref, data[n,:]), n))
    for r,n in rank[::-1][:cutoff]:
        out[0,n] = r
    return out
示例#5
0
    def _appearance_comparison(self, bbox0, bbox1, im0, im1, window_sz, model):
        '''
        Computes appearance characteristics using `model` between objects in
        `bbox0` and `bbox1` in `im0` and `im1` respectively.

        Returns distances as cosine similarity.

        Parameters
        ----------

        bbox0 : ndarray. M x 4 of bounding box dimensions.
            (min_row, min_col, max_row, max_col).
        bbox1 : ndarray. N x 4 of bounding box dimensions.
            (min_row, min_col, max_row, max_col).
        im0, im1 : ndarray.
            images containing `bbox0`, `bbox1` respectively.
        window_sz : integer.
            size of windows for feature extraction.
        model : object with a `.predict(im)` method that extracts appearance
            features.

        Returns
        -------
        cd : float. [0,1]. cosine distance.
        #ed : float. [0, inf). Euclidean distance.
        '''
        from scipy.spatial.distance import cosine as cosine_dist

        # pad channels dimension as "RGB" for 2D intensity images
        if len(im0.shape) < 3:
            im0 = np.stack([im0] * 3, -1)
        if len(im1.shape) < 3:
            im1 = np.stack([im1] * 3, -1)

        # pad images to ensure bboxes never run off the edge
        im0p = np.pad(im0,
                      ((window_sz, window_sz), (window_sz, window_sz), (0, 0)),
                      mode='reflect')
        im1p = np.pad(im1,
                      ((window_sz, window_sz), (window_sz, window_sz), (0, 0)),
                      mode='reflect')

        # move bboxes to fit new dimensional indexing of the image
        new_bbox0 = bbox0.copy() + window_sz
        new_bbox1 = bbox1.copy() + window_sz

        new_bbox0 = new_bbox0.astype('int32')
        new_bbox1 = new_bbox1.astype('int32')

        im0_roi = im0p[new_bbox0[0]:new_bbox0[2], new_bbox0[1]:new_bbox0[3], :]
        im1_roi = im1p[new_bbox1[0]:new_bbox1[2], new_bbox1[1]:new_bbox1[3], :]

        import keras.backend as K
        if K.backend() == 'tensorflow':
            # tensorflow: (batch, dim00, dim01, channels)
            im0_classif = np.expand_dims(im0_roi, 0)
            im1_classif = np.expand_dims(im1_roi, 0)
        else:
            # theano: (batch, channels, dim00, dim01)
            im0_classif = np.expand_dims(np.rollaxis(im0_roi, -1), 0)
            im1_classif = np.expand_dims(np.rollaxis(im1_roi, -1), 0)

        f0 = model.predict(im0_classif)
        f1 = model.predict(im1_classif)

        #if self.verbose:
        #print('Appearance features')
        #print(f0, ' | ', f1)
        assert np.isnan(f0).sum() == 0 and np.isnan(
            f1).sum() == 0, 'appearance feature was NaN'
        assert np.isinf(f0).sum() == 0 and np.isinf(
            f1).sum() == 0, 'appearance feature was inf'

        cd = cosine_dist(f0, f1)
        ed = np.sqrt(np.sum((f0 - f1)**2))

        return cd
示例#6
0
def cosine(u, v):
    '''
    Returns the cosine similarity between vectors u and v.
    '''
    return 1 - cosine_dist(u, v)
示例#7
0
def similarity(v1, v2):
    score = 0.0
    if np.count_nonzero(v1) != 0 and np.count_nonzero(v2) != 0:
        score = (
            (1 - cosine_dist(v1, v2)) + 1) / 2  # 1 - cosine_dist = similarity.
    return score
start_time = time.time()

for row_idx in range(n_rows):

    if row_idx % 100 == 0:
        print(row_idx)

    for col_idx in range(n_cols):

        curr_grad = (x_grads[row_idx, col_idx], y_grads[row_idx, col_idx])

        if col_idx > 0:
            left_grad = (x_grads[row_idx, col_idx - 1], y_grads[row_idx,
                                                                col_idx - 1])
            left_dist = cosine_dist(curr_grad, left_grad)
        else:
            left_dist = 0.0

        if col_idx < (n_cols - 1):
            right_grad = (x_grads[row_idx, col_idx + 1], y_grads[row_idx,
                                                                 col_idx + 1])
            right_dist = cosine_dist(curr_grad, right_grad)
        else:
            right_dist = 0.0

        if row_idx > 0:
            top_grad = (x_grads[row_idx - 1, col_idx], y_grads[row_idx - 1,
                                                               col_idx])
            top_dist = cosine_dist(curr_grad, top_grad)
        else:
示例#9
0
 def cosine_sim(self, w1, w2):
     return 1 - cosine_dist(self.all_embed[self.stoi(w1)],
                            self.all_embed[self.stoi(w2)])
示例#10
0
def cosine_similarity(tag_vactor_a, tag_vector_b):
    a = [0, 0, 1]
    b = [1, 2, 1]
    print 1 - cosine_dist(a, b)
示例#11
0
def score(resource_path, embeddings_path, simtest_path, use_UNK=False):
    """
    Compares scores contained in a test set with the maximum cosine similarity between all pairs of vectors associated
    to two given words' synsets, by computing Spearman and Pearson coefficients.
    :param resource_path: Path to the resource folder
    :param embeddings_path: Path to the embeddings.vec file
    :param simtest_path: Path to the similarity test file
    :param use_UNK: unknown synset vector -> True: uses vectors associated to <UNK>; False: 0-valued vector (default)
    :return: None
    """

    print("Loading vocabularies...")
    word_to_ix, _, _ = u.get_vocab(vocab_path=resource_path + "/vocab.txt",
                                   antivocab_path=resource_path +
                                   "/antivocab.txt")

    print("Loading mappings...")
    mapping, reverse_mapping = u.get_WN_mappings(resource_path +
                                                 "/bn2wn_mapping.txt",
                                                 with_reverse=True)

    print("Loading embeddings...")
    embeddings = keyedvectors.KeyedVectors.load_word2vec_format(
        embeddings_path, binary=False)

    print("Computing similarities...")
    gold_scores = []
    scores = []
    for word_pair in u.wordsim_pairs_generator(simtest_path):
        curr_score = -1
        for syn1 in wn.synsets(word_pair.word1):
            wn_id1 = "%d%s" % (syn1.offset(), syn1.pos())
            bn_id1 = reverse_mapping.get(wn_id1, None)

            # 0-valued vector in case no BabelNet ID is found
            vector1 = np.zeros(shape=[EMBEDDING_SIZE], dtype=np.float)

            if bn_id1 is None:
                if use_UNK:
                    vector1 = embeddings.get_vector("<UNK>")
            else:
                for word in word_to_ix.keys():
                    if word.find(bn_id1) > 0:
                        vector1 = embeddings.get_vector(word)
                        break

            for syn2 in wn.synsets(word_pair.word2):
                wn_id2 = "%d%s" % (syn2.offset(), syn2.pos())
                bn_id2 = reverse_mapping.get(wn_id2, None)

                # 0-valued vector in case no BabelNet ID is found
                vector2 = np.zeros(shape=[EMBEDDING_SIZE], dtype=np.float)

                if bn_id2 is None:
                    if use_UNK:
                        vector2 = embeddings.get_vector("<UNK>")
                else:
                    for word in word_to_ix.keys():
                        if word.find(bn_id2) > 0:
                            vector2 = embeddings.get_vector(word)
                            break

                cos_sim = 1.0 - cosine_dist(vector1, vector2)
                curr_score = max(curr_score, cos_sim)

        gold_scores.append(word_pair.score)
        scores.append(curr_score)

    # compute spearman and pearson coefficients
    print(
        "\nSpearman: %.3f\nPearson: %.3f" %
        (spearmanr(gold_scores, scores)[0], pearsonr(gold_scores, scores)[0]))