def get_nearest_neighbor(self, x_test, k, sample_class): distances = [] targets_index = [] for i in range(len(sample_class)): if (sample_class[i][:] != x_test).any(): if self.distance_calculator == 'jaccard': distance = dis.jaccard(x_test, sample_class[i][:]) elif self.distance_calculator == 'dice': distance = dis.dice(x_test, sample_class[i][:]) elif self.distance_calculator == 'correlation': distance = dis.correlation(x_test, sample_class[i][:]) elif self.distance_calculator == 'yule': distance = dis.yule(x_test, sample_class[i][:]) elif self.distance_calculator == 'russelo-rao': distance = dis.russellrao(x_test, sample_class[i][:]) elif self.distance_calculator == 'sokal-michener': distance = dis.sokalmichener(x_test, sample_class[i][:]) elif self.distance_calculator == 'rogers-tanimoto': distance = dis.rogerstanimoto(x_test, sample_class[i][:]) elif self.distance_calculator == 'kulzinsky': distance = dis.kulsinski(x_test, sample_class[i][:]) distances.append([distance, i]) # make a list of the k neighbors' targets distances.sort() for i in range(k): targets_index.append(distances[i][1]) return targets_index
def yule(self, x=None, y=None, w=None): """ Yule差异 x = [1, 0, 0] y = [0, 1, 0] """ x = x or self.x y = y or self.y w = w or self.w return distance.yule(x, y, w)
def cross_channel_boolean_distance_features(mask): """calculates the cross channel distance features Calculates the distances across channels Parameters ---------- mask : 3D array, shape (M, N, C) The input mask with multiple channels. Returns ------- features : dict dictionary including different distances across channels """ features = dict() for ch1 in range(mask.shape[2]): for ch2 in range(ch1 + 1, mask.shape[2]): # rehaping the channels to 1D channel1 = mask[:, :, ch1].ravel() channel2 = mask[:, :, ch2].ravel() # creating the suffix name for better readability suffix = "_Ch" + str(ch1 + 1) + "_Ch" + str(ch2 + 1) # storing the distance values features["dice_distance" + suffix] = dist.dice(channel1, channel2) features["hamming_distance" + suffix] = dist.hamming( channel1, channel2) features["jaccard_distance" + suffix] = dist.jaccard( channel1, channel2) features["kulsinski_distance" + suffix] = dist.kulsinski( channel1, channel2) features["rogerstanimoto_distance" + suffix] = dist.rogerstanimoto( channel1, channel2) features["russellrao_distance" + suffix] = dist.russellrao( channel1, channel2) features["sokalmichener_distance" + suffix] = dist.sokalmichener( channel1, channel2) features["sokalsneath_distance" + suffix] = dist.sokalsneath( channel1, channel2) features["yule_distance" + suffix] = dist.yule(channel1, channel2) return features
def calculate_pss(self, profile, ignore=None, method="pairwise"): """ Calculate Profiles Similarity Score. """ if len(self) != len(profile): raise ProfileError("Different profiles' lengths") prof_1 = self prof_2 = profile if ignore: for i in ignore: try: prof_1.profile = list(prof_1.profile) del prof_1.profile[prof_1.query.index(i)] prof_1.profile = tuple(prof_1.profile) except IndexError: raise ProfileError("Element to ignore not in profile") try: prof_2.profile = list(prof_2.profile) del prof_2.profile[prof_2.query.index(i)] prof_2.profile = tuple(prof_2.profile) except IndexError: raise ProfileError("Element to ignore not in profile") if method == "pairwise": return sum(a == b for a, b in zip(prof_1.profile, prof_2.profile)) elif method == "jaccard": return dist.jaccard(prof_1.profile, prof_2.profile) elif method == "yule": return dist.yule(prof_1.profile, prof_2.profile) elif method == "dice": return dist.dice(prof_1.profile, prof_2.profile) elif method == "hamming": return dist.hamming(prof_1.profile, prof_2.profile) elif method == "kulsinski": return dist.kulsinski(prof_1.profile, prof_2.profile) elif method == "rogerstanimoto": return dist.rogerstanimoto(prof_1.profile, prof_2.profile) elif method == "russellrao": return dist.russellrao(prof_1.profile, prof_2.profile) elif method == "sokalmichener": return dist.sokalmichener(prof_1.profile, prof_2.profile)
def do_yule(m, yule, vec): for i in range(m): for j in range(m): yule[i, j] = distance.yule(vec[i], vec[j]) return yule
def yule(app1SyscallsVector, app2SyscallsVector): return spDist.yule(app1SyscallsVector, app2SyscallsVector)
Qcanberra=[dist.canberra(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acanberra=[dist.canberra(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qhamming=[dist.hamming(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Ahamming=[dist.hamming(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qcorrelation=[dist.correlation(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acorrelation=[dist.correlation(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qcityblock=[dist.cityblock(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acityblock=[dist.cityblock(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qdice=[dist.dice(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Adice=[dist.dice(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qyule=[dist.yule(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Ayule=[dist.yule(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] #C_Q=np.histogram2d(QuestionTVectorArray[1],QuestionTVectorArray[1])[0] #print "question mutual info-->",mutual_info_score(None,None,contigency=C_Q)#QuestionTVectorArray[0:1],QuestionTVectorArray) #QuestionVectorArray=Qvectorizer.fit_transform(all_questions).toarray() #AnswerVectorArray=Avectorizer.fit_transform(all_answers).toarray() #QUserinputVectorArray=Qvectorizer.transform(userinput).toarray() #AUserinputVectorArray=Avectorizer.transform(userinput).toarray() #cx=lambda a,b:round(np.inner(a,b)/(LA.norm(a)*LA.norm(b)),3) """ mincosine=1 minques=0 for Qv in range(len(QuestionVectorArray)):
def yule_(x, y): try: return yule(x, y) except ZeroDivisionError: return 0
def exec_similarity(dct, algorithm): if validate_similarity_algorithms(dct, algorithm): return {} if algorithm == 'braycurtis': return [ answer.update({ algorithm: braycurtis(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'canberra': return [ answer.update({ algorithm: canberra(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'chebyshev': return [ answer.update({ algorithm: chebyshev(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'cityblock': return [ answer.update({ algorithm: cityblock(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'correlation': return [ answer.update({ algorithm: correlation(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'cosine': return [ answer.update({ algorithm: cosine(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'euclidean': return [ answer.update({ algorithm: euclidean(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'mahalanobis': return [ answer.update({ algorithm: mahalanobis(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] #elif algorithm is 'minkowski': #return [answer.update({algorithm:minkowski(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf']))}) for answer in dct['answers']] elif algorithm == 'seuclidean': return [ answer.update({ algorithm: seuclidean(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'sqeuclidean': return [ answer.update({ algorithm: sqeuclidean(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'wminkowski': return [ answer.update({ algorithm: wminkowski(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'dice': return [ answer.update({ algorithm: dice(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'hamming': return [ answer.update({ algorithm: hamming(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'jaccard': return [ answer.update({ algorithm: jaccard(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'kulsinski': return [ answer.update({ algorithm: kulsinski(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'rogerstanimoto': return [ answer.update({ algorithm: rogerstanimoto(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'russellrao': return [ answer.update({ algorithm: russellrao(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'sokalmichener': return [ answer.update({ algorithm: sokalmichener(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'sokalsneath': return [ answer.update({ algorithm: sokalsneath(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'yule': return [ answer.update({ algorithm: yule(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ]
def distance(self, vector1, vector2, type_): """ Calculate distance between two vectors. Args: vector1 (list of int/float/bool): Vector in vector space vector2 (list of int/float/bool): Vector in vector space type_ (str): Type of distance calculation. Allowed types are: * For numeric vectors * - braycurtis: Computes the Bray-Curtis distance between two arrays. - canberra: Computes the Canberra distance between two arrays. - chebyshev: Computes the Chebyshev distance. - cityblock: Computes the City Block (Manhattan) distance. - correlation: Computes the correlation distance between two arrays. - cosine: Computes the Cosine distance between arrays. - euclidean: Computes the Euclidean distance between two arrays. - sqeuclidean: Computes the squared Euclidean distance between two arrays. * For boolean vectors * - dice: Computes the Dice dissimilarity between two boolean arrays. - hamming: Computes the Hamming distance between two arrays. - jaccard: Computes the Jaccard-Needham dissimilarity between two boolean arrays. - kulsinski: Computes the Kulsinski dissimilarity between two boolean arrays. - rogerstanimoto: Computes the Rogers-Tanimoto dissimilarity between two boolean arrays. - russellrao: Computes the Russell-Rao dissimilarity between two boolean arrays. - sokalmichener: Computes the Sokal-Michener dissimilarity between two boolean arrays. - sokalsneath: Computes the Sokal-Sneath dissimilarity between two boolean arrays. - yule: Computes the Yule dissimilarity between two boolean arrays. Returns: float: Distance between vectors. """ if type_ == "braycurtis": return distance.braycurtis(vector1, vector2) elif type_ == "canberra": return distance.canberra(vector1, vector2) elif type_ == "chebyshev": return distance.chebyshev(vector1, vector2) elif type_ == "cityblock": return distance.cityblock(vector1, vector2) elif type_ == "correlation": return distance.correlation(vector1, vector2) elif type_ == "cosine": return distance.cosine(vector1, vector2) elif type_ == "euclidean": return distance.euclidean(vector1, vector2) elif type_ == "sqeuclidean": return distance.sqeuclidean(vector1, vector2) elif type_ == "dice": return distance.dice(vector1, vector2) elif type_ == "hamming": return distance.hamming(vector1, vector2) elif type_ == "jaccard": return distance.jaccard(vector1, vector2) elif type_ == "kulsinski": return distance.kulsinski(vector1, vector2) elif type_ == "kulsinski": return distance.kulsinski(vector1, vector2) elif type_ == "rogerstanimoto": return distance.rogerstanimoto(vector1, vector2) elif type_ == "russellrao": return distance.russellrao(vector1, vector2) elif type_ == "sokalmichener": return distance.sokalmichener(vector1, vector2) elif type_ == "sokalsneath": return distance.sokalsneath(vector1, vector2) elif type_ == "yule": return distance.yule(vector1, vector2) else: raise ValueError( """Wrong value for type_. Please enter one of supported values. Type help(distance) to see supported values.""")
def main(): from scipy.spatial import distance a = np.array([1, 2, 43]) b = np.array([3, 2, 1]) d = Distance() print('-----------------------------------------------------------------') print('My braycurtis: {}'.format(d.braycurtis(a, b))) print('SciPy braycurtis: {}'.format(distance.braycurtis(a, b))) print('-----------------------------------------------------------------') print('My canberra: {}'.format(d.canberra(a, b))) print('SciPy canberra: {}'.format(distance.canberra(a, b))) print('-----------------------------------------------------------------') print('My chebyshev: {}'.format(d.chebyshev(a, b))) print('SciPy chebyshev: {}'.format(distance.chebyshev(a, b))) print('-----------------------------------------------------------------') print('My cityblock: {}'.format(d.cityblock(a, b))) print('SciPy cityblock: {}'.format(distance.cityblock(a, b))) print('-----------------------------------------------------------------') print('My correlation: {}'.format(d.correlation(a, b))) print('SciPy correlation: {}'.format(distance.correlation(a, b))) print('-----------------------------------------------------------------') print('My euclidean: {}'.format(d.euclidean(a, b))) print('SciPy euclidean: {}'.format(distance.euclidean(a, b))) print('-----------------------------------------------------------------') print('My hamming: {}'.format(d.hamming(a, b))) print('SciPy hamming: {}'.format(distance.hamming(a, b))) print('-----------------------------------------------------------------') print('My jaccard: {}'.format(d.jaccard(a, b))) print('SciPy jaccard: {}'.format(distance.jaccard(a, b))) print('-----------------------------------------------------------------') print('My manhattan: {}'.format(d.cityblock(a, b))) print('SciPy manhattan: {}'.format(distance.cityblock(a, b))) print('-----------------------------------------------------------------') print('My cosine: {}'.format(d.cosine(a, b))) print('SciPy cosine: {}'.format(distance.cosine(a, b))) print('-----------------------------------------------------------------') print('My dice: {}'.format(d.dice(a, b))) print('SciPy dice: {}'.format(distance.dice(a, b))) print('-----------------------------------------------------------------') print('My kulsinski: {}'.format(d.kulsinski(a, b))) print('SciPy kulsinski: {}'.format(distance.kulsinski(a, b))) print('-----------------------------------------------------------------') iv = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]]) print('My mahalanobis: {}'.format(d.mahalanobis(a, b, iv))) print('SciPy mahalanobis: {}'.format(distance.mahalanobis(a, b, iv))) print('-----------------------------------------------------------------') print('My seuclidean: {}'.format( d.seuclidean(a, b, np.array([0.1, 0.1, 0.1])))) print('SciPy seuclidean: {}'.format( distance.seuclidean(a, b, [0.1, 0.1, 0.1]))) print('-----------------------------------------------------------------') print('My sokalmichener: {}'.format(d.sokalmichener(a, b))) print('SciPy sokalmichener: {}'.format(distance.sokalmichener(a, b))) print('-----------------------------------------------------------------') print('My sokal_sneath: {}'.format(d.sokalsneath(a, b))) print('SciPy sokal_sneath: {}'.format(distance.sokalsneath(a, b))) print('-----------------------------------------------------------------') print('My sqeuclidean: {}'.format(d.sqeuclidean(a, b))) print('SciPy sqeuclidean: {}'.format(distance.sqeuclidean(a, b))) print('-----------------------------------------------------------------') print('My minkowski: {}'.format(d.minkowski(a, b, 2))) print('SciPy minkowski: {}'.format(distance.minkowski(a, b, 2))) print('-----------------------------------------------------------------') print('My rogerstanimoto: {}'.format(d.rogerstanimoto(a, b))) print('SciPy rogerstanimoto: {}'.format(distance.rogerstanimoto(a, b))) print('-----------------------------------------------------------------') print('My russellrao: {}'.format(d.russellrao(a, b))) print('SciPy russellrao: {}'.format(distance.russellrao(a, b))) print('-----------------------------------------------------------------') print('My wminkowski: {}'.format(d.wminkowski(a, b, 2, np.ones(3)))) print('SciPy wminkowski: {}'.format( distance.wminkowski(a, b, 2, np.ones(3)))) print('-----------------------------------------------------------------') print('My yule: {}'.format(d.yule(a, b))) print('SciPy yule: {}'.format(distance.yule(a, b))) print('-----------------------------------------------------------------')
def my_dist(u, v): return cosine(u, v) * yule(u, v) * braycurtis(u, v) * np.abs( rogerstanimoto(u, v))