Пример #1
0
    def get_nearest_neighbor(self, x_test, k, sample_class):
        distances = []
        targets_index = []
        for i in range(len(sample_class)):
            if (sample_class[i][:] != x_test).any():
                if self.distance_calculator == 'jaccard':
                    distance = dis.jaccard(x_test, sample_class[i][:])
                elif self.distance_calculator == 'dice':
                    distance = dis.dice(x_test, sample_class[i][:])
                elif self.distance_calculator == 'correlation':
                    distance = dis.correlation(x_test, sample_class[i][:])
                elif self.distance_calculator == 'yule':
                    distance = dis.yule(x_test, sample_class[i][:])
                elif self.distance_calculator == 'russelo-rao':
                    distance = dis.russellrao(x_test, sample_class[i][:])
                elif self.distance_calculator == 'sokal-michener':
                    distance = dis.sokalmichener(x_test, sample_class[i][:])
                elif self.distance_calculator == 'rogers-tanimoto':
                    distance = dis.rogerstanimoto(x_test, sample_class[i][:])
                elif self.distance_calculator == 'kulzinsky':
                    distance = dis.kulsinski(x_test, sample_class[i][:])
                distances.append([distance, i])

        # make a list of the k neighbors' targets
        distances.sort()
        for i in range(k):
            targets_index.append(distances[i][1])
        return targets_index
Пример #2
0
    def yule(self, x=None, y=None, w=None):
        """
        Yule差异

        x = [1, 0, 0]
        y = [0, 1, 0]
        """
        x = x or self.x
        y = y or self.y
        w = w or self.w
        return distance.yule(x, y, w)
Пример #3
0
def cross_channel_boolean_distance_features(mask):
    """calculates the cross channel distance features 
    
    Calculates the distances across channels 

    Parameters
    ----------
    mask : 3D array, shape (M, N, C)
        The input mask with multiple channels. 

    Returns
    -------
    features :  dict  
        dictionary including different distances across channels

    """

    features = dict()
    for ch1 in range(mask.shape[2]):
        for ch2 in range(ch1 + 1, mask.shape[2]):
            # rehaping the channels to 1D
            channel1 = mask[:, :, ch1].ravel()
            channel2 = mask[:, :, ch2].ravel()

            # creating the suffix name for better readability
            suffix = "_Ch" + str(ch1 + 1) + "_Ch" + str(ch2 + 1)

            # storing the distance values
            features["dice_distance" + suffix] = dist.dice(channel1, channel2)
            features["hamming_distance" + suffix] = dist.hamming(
                channel1, channel2)
            features["jaccard_distance" + suffix] = dist.jaccard(
                channel1, channel2)
            features["kulsinski_distance" + suffix] = dist.kulsinski(
                channel1, channel2)
            features["rogerstanimoto_distance" + suffix] = dist.rogerstanimoto(
                channel1, channel2)
            features["russellrao_distance" + suffix] = dist.russellrao(
                channel1, channel2)
            features["sokalmichener_distance" + suffix] = dist.sokalmichener(
                channel1, channel2)
            features["sokalsneath_distance" + suffix] = dist.sokalsneath(
                channel1, channel2)
            features["yule_distance" + suffix] = dist.yule(channel1, channel2)

    return features
Пример #4
0
 def calculate_pss(self,
                   profile,
                   ignore=None,
                   method="pairwise"):
     """
     Calculate Profiles Similarity Score.
     """
     if len(self) != len(profile):
         raise ProfileError("Different profiles' lengths")
     prof_1 = self
     prof_2 = profile
     if ignore:
         for i in ignore:
             try:
                 prof_1.profile = list(prof_1.profile)
                 del prof_1.profile[prof_1.query.index(i)]
                 prof_1.profile = tuple(prof_1.profile)
             except IndexError:
                 raise ProfileError("Element to ignore not in profile")
             try:
                 prof_2.profile = list(prof_2.profile)
                 del prof_2.profile[prof_2.query.index(i)]
                 prof_2.profile = tuple(prof_2.profile)
             except IndexError:
                 raise ProfileError("Element to ignore not in profile")
     if method == "pairwise":
         return sum(a == b for a, b in zip(prof_1.profile, prof_2.profile))
     elif method == "jaccard":
         return dist.jaccard(prof_1.profile, prof_2.profile)
     elif method == "yule":
         return dist.yule(prof_1.profile, prof_2.profile)
     elif method == "dice":
         return dist.dice(prof_1.profile, prof_2.profile)
     elif method == "hamming":
         return dist.hamming(prof_1.profile, prof_2.profile)
     elif method == "kulsinski":
         return dist.kulsinski(prof_1.profile, prof_2.profile)
     elif method == "rogerstanimoto":
         return dist.rogerstanimoto(prof_1.profile, prof_2.profile)
     elif method == "russellrao":
         return dist.russellrao(prof_1.profile, prof_2.profile)
     elif method == "sokalmichener":
         return dist.sokalmichener(prof_1.profile, prof_2.profile)
Пример #5
0
def do_yule(m, yule, vec):
    for i in range(m):
        for j in range(m):
            yule[i, j] = distance.yule(vec[i], vec[j])
    return yule
Пример #6
0
def yule(app1SyscallsVector, app2SyscallsVector):
    return spDist.yule(app1SyscallsVector, app2SyscallsVector)
Пример #7
0
Qcanberra=[dist.canberra(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acanberra=[dist.canberra(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qhamming=[dist.hamming(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Ahamming=[dist.hamming(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qcorrelation=[dist.correlation(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acorrelation=[dist.correlation(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qcityblock=[dist.cityblock(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acityblock=[dist.cityblock(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qdice=[dist.dice(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Adice=[dist.dice(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qyule=[dist.yule(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Ayule=[dist.yule(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

#C_Q=np.histogram2d(QuestionTVectorArray[1],QuestionTVectorArray[1])[0]

#print "question mutual info-->",mutual_info_score(None,None,contigency=C_Q)#QuestionTVectorArray[0:1],QuestionTVectorArray)
#QuestionVectorArray=Qvectorizer.fit_transform(all_questions).toarray()
#AnswerVectorArray=Avectorizer.fit_transform(all_answers).toarray()

#QUserinputVectorArray=Qvectorizer.transform(userinput).toarray()
#AUserinputVectorArray=Avectorizer.transform(userinput).toarray()
#cx=lambda a,b:round(np.inner(a,b)/(LA.norm(a)*LA.norm(b)),3)
"""
mincosine=1
minques=0
for Qv in range(len(QuestionVectorArray)):
def yule_(x, y):
    try:
        return yule(x, y)
    except ZeroDivisionError:
        return 0
Пример #9
0
def exec_similarity(dct, algorithm):
    if validate_similarity_algorithms(dct, algorithm):
        return {}
    if algorithm == 'braycurtis':
        return [
            answer.update({
                algorithm:
                braycurtis(ndarray_dict(dct['tf_idf']),
                           ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'canberra':
        return [
            answer.update({
                algorithm:
                canberra(ndarray_dict(dct['tf_idf']),
                         ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'chebyshev':
        return [
            answer.update({
                algorithm:
                chebyshev(ndarray_dict(dct['tf_idf']),
                          ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'cityblock':
        return [
            answer.update({
                algorithm:
                cityblock(ndarray_dict(dct['tf_idf']),
                          ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'correlation':
        return [
            answer.update({
                algorithm:
                correlation(ndarray_dict(dct['tf_idf']),
                            ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'cosine':
        return [
            answer.update({
                algorithm:
                cosine(ndarray_dict(dct['tf_idf']),
                       ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'euclidean':
        return [
            answer.update({
                algorithm:
                euclidean(ndarray_dict(dct['tf_idf']),
                          ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'mahalanobis':
        return [
            answer.update({
                algorithm:
                mahalanobis(ndarray_dict(dct['tf_idf']),
                            ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    #elif algorithm is 'minkowski':
    #return [answer.update({algorithm:minkowski(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf']))}) for answer in dct['answers']]
    elif algorithm == 'seuclidean':
        return [
            answer.update({
                algorithm:
                seuclidean(ndarray_dict(dct['tf_idf']),
                           ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'sqeuclidean':
        return [
            answer.update({
                algorithm:
                sqeuclidean(ndarray_dict(dct['tf_idf']),
                            ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'wminkowski':
        return [
            answer.update({
                algorithm:
                wminkowski(ndarray_dict(dct['tf_idf']),
                           ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'dice':
        return [
            answer.update({
                algorithm:
                dice(ndarray_dict(dct['tf_idf']),
                     ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'hamming':
        return [
            answer.update({
                algorithm:
                hamming(ndarray_dict(dct['tf_idf']),
                        ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'jaccard':
        return [
            answer.update({
                algorithm:
                jaccard(ndarray_dict(dct['tf_idf']),
                        ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'kulsinski':
        return [
            answer.update({
                algorithm:
                kulsinski(ndarray_dict(dct['tf_idf']),
                          ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'rogerstanimoto':
        return [
            answer.update({
                algorithm:
                rogerstanimoto(ndarray_dict(dct['tf_idf']),
                               ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'russellrao':
        return [
            answer.update({
                algorithm:
                russellrao(ndarray_dict(dct['tf_idf']),
                           ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'sokalmichener':
        return [
            answer.update({
                algorithm:
                sokalmichener(ndarray_dict(dct['tf_idf']),
                              ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'sokalsneath':
        return [
            answer.update({
                algorithm:
                sokalsneath(ndarray_dict(dct['tf_idf']),
                            ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'yule':
        return [
            answer.update({
                algorithm:
                yule(ndarray_dict(dct['tf_idf']),
                     ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
 def distance(self, vector1, vector2, type_):
     """ 
     Calculate distance between two vectors.
     
     Args:
         vector1 (list of int/float/bool): Vector in vector space
         vector2 (list of int/float/bool): Vector in vector space
         type_ (str): Type of distance calculation. Allowed types are:
             * For numeric vectors *
             - braycurtis: Computes the Bray-Curtis distance between two arrays.
             - canberra: Computes the Canberra distance between two arrays.
             - chebyshev: 	Computes the Chebyshev distance.
             - cityblock: Computes the City Block (Manhattan) distance.
             - correlation: Computes the correlation distance between two arrays.
             - cosine: Computes the Cosine distance between arrays.
             - euclidean: Computes the Euclidean distance between two arrays.
             - sqeuclidean: Computes the squared Euclidean distance between two arrays.
             
             * For boolean vectors *
             - dice: Computes the Dice dissimilarity between two boolean arrays.
             - hamming: Computes the Hamming distance between two arrays.
             - jaccard: Computes the Jaccard-Needham dissimilarity between two boolean arrays.
             - kulsinski: Computes the Kulsinski dissimilarity between two boolean arrays.
             - rogerstanimoto: Computes the Rogers-Tanimoto dissimilarity between two boolean arrays.
             - russellrao: Computes the Russell-Rao dissimilarity between two boolean arrays.
             - sokalmichener: Computes the Sokal-Michener dissimilarity between two boolean arrays.
             - sokalsneath: Computes the Sokal-Sneath dissimilarity between two boolean arrays.
             - yule: Computes the Yule dissimilarity between two boolean arrays.
             
     Returns:
         float: Distance between vectors.
     """
     if type_ == "braycurtis":
         return distance.braycurtis(vector1, vector2)
     elif type_ == "canberra":
         return distance.canberra(vector1, vector2)
     elif type_ == "chebyshev":
         return distance.chebyshev(vector1, vector2)
     elif type_ == "cityblock":
         return distance.cityblock(vector1, vector2)
     elif type_ == "correlation":
         return distance.correlation(vector1, vector2)
     elif type_ == "cosine":
         return distance.cosine(vector1, vector2)
     elif type_ == "euclidean":
         return distance.euclidean(vector1, vector2)
     elif type_ == "sqeuclidean":
         return distance.sqeuclidean(vector1, vector2)
     elif type_ == "dice":
         return distance.dice(vector1, vector2)
     elif type_ == "hamming":
         return distance.hamming(vector1, vector2)
     elif type_ == "jaccard":
         return distance.jaccard(vector1, vector2)
     elif type_ == "kulsinski":
         return distance.kulsinski(vector1, vector2)
     elif type_ == "kulsinski":
         return distance.kulsinski(vector1, vector2)
     elif type_ == "rogerstanimoto":
         return distance.rogerstanimoto(vector1, vector2)
     elif type_ == "russellrao":
         return distance.russellrao(vector1, vector2)
     elif type_ == "sokalmichener":
         return distance.sokalmichener(vector1, vector2)
     elif type_ == "sokalsneath":
         return distance.sokalsneath(vector1, vector2)
     elif type_ == "yule":
         return distance.yule(vector1, vector2)
     else:
         raise ValueError(
             """Wrong value for type_. Please enter one of supported values.
                          Type help(distance) to see supported values.""")
Пример #11
0
def main():
    from scipy.spatial import distance
    a = np.array([1, 2, 43])
    b = np.array([3, 2, 1])

    d = Distance()
    print('-----------------------------------------------------------------')

    print('My       braycurtis: {}'.format(d.braycurtis(a, b)))
    print('SciPy    braycurtis: {}'.format(distance.braycurtis(a, b)))
    print('-----------------------------------------------------------------')

    print('My       canberra: {}'.format(d.canberra(a, b)))
    print('SciPy    canberra: {}'.format(distance.canberra(a, b)))
    print('-----------------------------------------------------------------')

    print('My       chebyshev: {}'.format(d.chebyshev(a, b)))
    print('SciPy    chebyshev: {}'.format(distance.chebyshev(a, b)))
    print('-----------------------------------------------------------------')

    print('My       cityblock: {}'.format(d.cityblock(a, b)))
    print('SciPy    cityblock: {}'.format(distance.cityblock(a, b)))
    print('-----------------------------------------------------------------')

    print('My       correlation: {}'.format(d.correlation(a, b)))
    print('SciPy    correlation: {}'.format(distance.correlation(a, b)))
    print('-----------------------------------------------------------------')

    print('My       euclidean: {}'.format(d.euclidean(a, b)))
    print('SciPy    euclidean: {}'.format(distance.euclidean(a, b)))
    print('-----------------------------------------------------------------')

    print('My       hamming: {}'.format(d.hamming(a, b)))
    print('SciPy    hamming: {}'.format(distance.hamming(a, b)))
    print('-----------------------------------------------------------------')

    print('My       jaccard: {}'.format(d.jaccard(a, b)))
    print('SciPy    jaccard: {}'.format(distance.jaccard(a, b)))
    print('-----------------------------------------------------------------')

    print('My       manhattan: {}'.format(d.cityblock(a, b)))
    print('SciPy    manhattan: {}'.format(distance.cityblock(a, b)))
    print('-----------------------------------------------------------------')

    print('My       cosine: {}'.format(d.cosine(a, b)))
    print('SciPy    cosine: {}'.format(distance.cosine(a, b)))
    print('-----------------------------------------------------------------')

    print('My       dice: {}'.format(d.dice(a, b)))
    print('SciPy    dice: {}'.format(distance.dice(a, b)))
    print('-----------------------------------------------------------------')

    print('My       kulsinski: {}'.format(d.kulsinski(a, b)))
    print('SciPy    kulsinski: {}'.format(distance.kulsinski(a, b)))
    print('-----------------------------------------------------------------')

    iv = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]])
    print('My       mahalanobis: {}'.format(d.mahalanobis(a, b, iv)))
    print('SciPy    mahalanobis: {}'.format(distance.mahalanobis(a, b, iv)))
    print('-----------------------------------------------------------------')

    print('My       seuclidean: {}'.format(
        d.seuclidean(a, b, np.array([0.1, 0.1, 0.1]))))
    print('SciPy    seuclidean: {}'.format(
        distance.seuclidean(a, b, [0.1, 0.1, 0.1])))
    print('-----------------------------------------------------------------')

    print('My       sokalmichener: {}'.format(d.sokalmichener(a, b)))
    print('SciPy    sokalmichener: {}'.format(distance.sokalmichener(a, b)))
    print('-----------------------------------------------------------------')

    print('My       sokal_sneath: {}'.format(d.sokalsneath(a, b)))
    print('SciPy    sokal_sneath: {}'.format(distance.sokalsneath(a, b)))
    print('-----------------------------------------------------------------')

    print('My       sqeuclidean: {}'.format(d.sqeuclidean(a, b)))
    print('SciPy    sqeuclidean: {}'.format(distance.sqeuclidean(a, b)))
    print('-----------------------------------------------------------------')

    print('My       minkowski: {}'.format(d.minkowski(a, b, 2)))
    print('SciPy    minkowski: {}'.format(distance.minkowski(a, b, 2)))
    print('-----------------------------------------------------------------')

    print('My       rogerstanimoto: {}'.format(d.rogerstanimoto(a, b)))
    print('SciPy    rogerstanimoto: {}'.format(distance.rogerstanimoto(a, b)))
    print('-----------------------------------------------------------------')

    print('My       russellrao: {}'.format(d.russellrao(a, b)))
    print('SciPy    russellrao: {}'.format(distance.russellrao(a, b)))
    print('-----------------------------------------------------------------')

    print('My       wminkowski: {}'.format(d.wminkowski(a, b, 2, np.ones(3))))
    print('SciPy    wminkowski: {}'.format(
        distance.wminkowski(a, b, 2, np.ones(3))))
    print('-----------------------------------------------------------------')

    print('My       yule: {}'.format(d.yule(a, b)))
    print('SciPy    yule: {}'.format(distance.yule(a, b)))
    print('-----------------------------------------------------------------')
Пример #12
0
def my_dist(u, v):
    return cosine(u, v) * yule(u, v) * braycurtis(u, v) * np.abs(
        rogerstanimoto(u, v))
Пример #13
0
def yule_(x, y):
    try:
        return yule(x, y)
    except ZeroDivisionError:
        return 0